From f6f76462c24ded8fea8e6ca5f9792809d5e566d6 Mon Sep 17 00:00:00 2001 From: Marius Grama Date: Tue, 21 Nov 2023 11:00:55 +0100 Subject: [PATCH] Prune unused stats columns when reading Delta checkpoint Add support for stats projection in Delta checkpoint iterator --- .../plugin/deltalake/DeltaLakeMetadata.java | 8 +- .../deltalake/DeltaLakeSplitManager.java | 8 +- .../FileBasedTableStatisticsProvider.java | 8 +- .../transactionlog/TableSnapshot.java | 13 +- .../transactionlog/TransactionLogAccess.java | 49 ++++- .../checkpoint/CheckpointEntryIterator.java | 27 ++- .../checkpoint/CheckpointSchemaManager.java | 12 +- .../checkpoint/CheckpointWriter.java | 9 +- .../checkpoint/CheckpointWriterManager.java | 7 +- .../plugin/deltalake/TestDeltaLakeBasic.java | 74 +++++++ .../deltalake/TestDeltaLakeSplitManager.java | 9 +- .../transactionlog/TestTableSnapshot.java | 21 +- .../TestCheckpointEntryIterator.java | 203 ++++++++++++++++-- .../checkpoint/TestCheckpointWriter.java | 4 +- .../TestDeltaLakeFileStatistics.java | 10 +- .../parsed_stats_case_sensitive/README.txt | 13 ++ .../_delta_log/00000000000000000000.json | 3 + .../_delta_log/00000000000000000001.json | 3 + .../00000000000000000002.checkpoint.parquet | Bin 0 -> 20131 bytes .../_delta_log/00000000000000000002.json | 2 + .../_delta_log/00000000000000000003.json | 2 + .../_delta_log/_last_checkpoint | 1 + ...4722-9837-4b11599ea66d.c000.snappy.parquet | Bin 0 -> 832 bytes ...4e0a-bf25-eba86bfdce11.c000.snappy.parquet | Bin 0 -> 839 bytes ...4a58-8c2c-d775a1203dd3.c000.snappy.parquet | Bin 0 -> 839 bytes ...4d32-be2f-9051e90920ce.c000.snappy.parquet | Bin 0 -> 839 bytes .../parsed_stats_struct/README.txt | 14 ++ .../_delta_log/00000000000000000000.json | 3 + .../_delta_log/00000000000000000001.json | 3 + .../00000000000000000002.checkpoint.parquet | Bin 0 -> 21976 bytes .../_delta_log/00000000000000000002.json | 2 + .../_delta_log/00000000000000000003.json | 2 + .../_delta_log/_last_checkpoint | 1 + ...4039-920b-d1f24022685e.c000.snappy.parquet | Bin 0 -> 1161 bytes ...44b1-a897-4b0e7d009ae4.c000.snappy.parquet | Bin 0 -> 1168 bytes ...4848-9d25-03962fc0e540.c000.snappy.parquet | Bin 0 -> 1167 bytes ...4848-9853-f82afd408710.c000.snappy.parquet | Bin 0 -> 1168 bytes 37 files changed, 463 insertions(+), 48 deletions(-) create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/README.txt create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000000.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000001.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000002.checkpoint.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000002.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000003.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/_last_checkpoint create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/part=100/part-00000-c707739f-45c0-4722-9837-4b11599ea66d.c000.snappy.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/part=200/part-00000-9cfa2c9c-efae-4e0a-bf25-eba86bfdce11.c000.snappy.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/part=300/part-00000-7b5e9086-abfc-4a58-8c2c-d775a1203dd3.c000.snappy.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/part=400/part-00000-e90e48ef-2b52-4d32-be2f-9051e90920ce.c000.snappy.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/README.txt create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000000.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000001.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000002.checkpoint.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000002.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000003.json create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/_last_checkpoint create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/part=100/part-00000-7d5755b3-20b1-4039-920b-d1f24022685e.c000.snappy.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/part=200/part-00000-22aeb477-f838-44b1-a897-4b0e7d009ae4.c000.snappy.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/part=300/part-00000-d9fc223e-4feb-4848-9d25-03962fc0e540.c000.snappy.parquet create mode 100644 plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/part=400/part-00000-da80387a-f286-4848-9853-f82afd408710.c000.snappy.parquet diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java index 6d663016204c..8f9b8f5eb07d 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeMetadata.java @@ -3546,7 +3546,13 @@ private OptionalLong executeDelete(ConnectorSession session, ConnectorTableHandl private List getAddFileEntriesMatchingEnforcedPartitionConstraint(ConnectorSession session, DeltaLakeTableHandle tableHandle) { TableSnapshot tableSnapshot = getSnapshot(session, tableHandle); - List validDataFiles = transactionLogAccess.getActiveFiles(tableSnapshot, tableHandle.getMetadataEntry(), tableHandle.getProtocolEntry(), tableHandle.getEnforcedPartitionConstraint(), session); + List validDataFiles = transactionLogAccess.getActiveFiles( + tableSnapshot, + tableHandle.getMetadataEntry(), + tableHandle.getProtocolEntry(), + tableHandle.getEnforcedPartitionConstraint(), + tableHandle.getProjectedColumns(), + session); TupleDomain enforcedPartitionConstraint = tableHandle.getEnforcedPartitionConstraint(); if (enforcedPartitionConstraint.isAll()) { return validDataFiles; diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java index 606ab5d55ce1..8f6f48f247db 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/DeltaLakeSplitManager.java @@ -154,7 +154,13 @@ private Stream getSplits( { TableSnapshot tableSnapshot = deltaLakeTransactionManager.get(transaction, session.getIdentity()) .getSnapshot(session, tableHandle.getSchemaTableName(), tableHandle.getLocation(), tableHandle.getReadVersion()); - List validDataFiles = transactionLogAccess.getActiveFiles(tableSnapshot, tableHandle.getMetadataEntry(), tableHandle.getProtocolEntry(), tableHandle.getEnforcedPartitionConstraint(), session); + List validDataFiles = transactionLogAccess.getActiveFiles( + tableSnapshot, + tableHandle.getMetadataEntry(), + tableHandle.getProtocolEntry(), + tableHandle.getEnforcedPartitionConstraint(), + tableHandle.getProjectedColumns(), + session); TupleDomain enforcedPartitionConstraint = tableHandle.getEnforcedPartitionConstraint(); TupleDomain nonPartitionConstraint = tableHandle.getNonPartitionConstraint(); Domain pathDomain = getPathDomain(nonPartitionConstraint); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/statistics/FileBasedTableStatisticsProvider.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/statistics/FileBasedTableStatisticsProvider.java index eddcfcb99f84..d91b428fc2c6 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/statistics/FileBasedTableStatisticsProvider.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/statistics/FileBasedTableStatisticsProvider.java @@ -110,7 +110,13 @@ public TableStatistics getTableStatistics(ConnectorSession session, DeltaLakeTab .filter(column -> predicatedColumnNames.contains(column.getName())) .collect(toImmutableList()); - for (AddFileEntry addEntry : transactionLogAccess.getActiveFiles(tableSnapshot, tableHandle.getMetadataEntry(), tableHandle.getProtocolEntry(), session)) { + for (AddFileEntry addEntry : transactionLogAccess.getActiveFiles( + tableSnapshot, + tableHandle.getMetadataEntry(), + tableHandle.getProtocolEntry(), + tableHandle.getEnforcedPartitionConstraint(), + tableHandle.getProjectedColumns(), + session)) { Optional fileStatistics = addEntry.getStats(); if (fileStatistics.isEmpty()) { // Open source Delta Lake does not collect stats diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java index ba4fc8c37a40..bdf2f428dc0f 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TableSnapshot.java @@ -36,6 +36,7 @@ import java.util.List; import java.util.Optional; import java.util.Set; +import java.util.function.Predicate; import java.util.stream.Stream; import static com.google.common.base.Preconditions.checkState; @@ -181,7 +182,8 @@ public Stream getCheckpointTransactionLogEntries( TrinoFileSystem fileSystem, FileFormatDataSourceStats stats, Optional metadataAndProtocol, - TupleDomain partitionConstraint) + TupleDomain partitionConstraint, + Optional> addStatsMinMaxColumnFilter) throws IOException { if (lastCheckpoint.isEmpty()) { @@ -210,7 +212,8 @@ public Stream getCheckpointTransactionLogEntries( stats, checkpoint, checkpointFile, - partitionConstraint))); + partitionConstraint, + addStatsMinMaxColumnFilter))); } return resultStream; } @@ -230,7 +233,8 @@ private Iterator getCheckpointTransactionLogEntrie FileFormatDataSourceStats stats, LastCheckpoint checkpoint, TrinoInputFile checkpointFile, - TupleDomain partitionConstraint) + TupleDomain partitionConstraint, + Optional> addStatsMinMaxColumnFilter) throws IOException { long fileSize; @@ -253,7 +257,8 @@ private Iterator getCheckpointTransactionLogEntrie parquetReaderOptions, checkpointRowStatisticsWritingEnabled, domainCompactionThreshold, - partitionConstraint); + partitionConstraint, + addStatsMinMaxColumnFilter); } public record MetadataAndProtocolEntry(MetadataEntry metadataEntry, ProtocolEntry protocolEntry) diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java index b503a27c28cb..f308865e90b4 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/TransactionLogAccess.java @@ -68,11 +68,15 @@ import java.util.concurrent.TimeUnit; import java.util.function.BiFunction; import java.util.function.Function; +import java.util.function.Predicate; import java.util.stream.Stream; +import static com.google.common.base.Predicates.alwaysFalse; +import static com.google.common.base.Predicates.alwaysTrue; import static com.google.common.base.Throwables.throwIfUnchecked; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; +import static com.google.common.collect.ImmutableSet.toImmutableSet; import static io.airlift.slice.SizeOf.estimatedSizeOf; import static io.airlift.slice.SizeOf.instanceSize; import static io.trino.cache.CacheUtils.invalidateAllIf; @@ -223,17 +227,48 @@ public MetadataEntry getMetadataEntry(TableSnapshot tableSnapshot, ConnectorSess .orElseThrow(() -> new TrinoException(DELTA_LAKE_INVALID_SCHEMA, "Metadata not found in transaction log for " + tableSnapshot.getTable())); } + // Deprecated in favor of the namesake method which allows checkpoint filtering + // to be able to perform partition pruning and stats projection on the `add` entries + // from the checkpoint. + /** + * @see #getActiveFiles(TableSnapshot, MetadataEntry, ProtocolEntry, TupleDomain, Optional, ConnectorSession) + */ @Deprecated public List getActiveFiles(TableSnapshot tableSnapshot, MetadataEntry metadataEntry, ProtocolEntry protocolEntry, ConnectorSession session) { - return getActiveFiles(tableSnapshot, metadataEntry, protocolEntry, TupleDomain.all(), session); + return retrieveActiveFiles(tableSnapshot, metadataEntry, protocolEntry, TupleDomain.all(), Optional.empty(), session); + } + + public List getActiveFiles( + TableSnapshot tableSnapshot, + MetadataEntry metadataEntry, + ProtocolEntry protocolEntry, + TupleDomain partitionConstraint, + Optional> projectedColumns, + ConnectorSession session) + { + Optional> addStatsMinMaxColumnFilter = Optional.of(alwaysFalse()); + if (projectedColumns.isPresent()) { + Set baseColumnNames = projectedColumns.get().stream() + .filter(DeltaLakeColumnHandle::isBaseColumn) // Only base column stats are supported + .map(DeltaLakeColumnHandle::getColumnName) + .collect(toImmutableSet()); + addStatsMinMaxColumnFilter = Optional.of(baseColumnNames::contains); + } + return retrieveActiveFiles(tableSnapshot, metadataEntry, protocolEntry, partitionConstraint, addStatsMinMaxColumnFilter, session); } - public List getActiveFiles(TableSnapshot tableSnapshot, MetadataEntry metadataEntry, ProtocolEntry protocolEntry, TupleDomain partitionConstraint, ConnectorSession session) + private List retrieveActiveFiles( + TableSnapshot tableSnapshot, + MetadataEntry metadataEntry, + ProtocolEntry protocolEntry, + TupleDomain partitionConstraint, + Optional> addStatsMinMaxColumnFilter, + ConnectorSession session) { try { if (isCheckpointFilteringEnabled(session)) { - return loadActiveFiles(tableSnapshot, metadataEntry, protocolEntry, partitionConstraint, session).stream() + return loadActiveFiles(tableSnapshot, metadataEntry, protocolEntry, partitionConstraint, addStatsMinMaxColumnFilter, session).stream() .collect(toImmutableList()); } @@ -264,7 +299,7 @@ public List getActiveFiles(TableSnapshot tableSnapshot, MetadataEn } } - List activeFiles = loadActiveFiles(tableSnapshot, metadataEntry, protocolEntry, TupleDomain.all(), session); + List activeFiles = loadActiveFiles(tableSnapshot, metadataEntry, protocolEntry, TupleDomain.all(), Optional.of(alwaysTrue()), session); return new DeltaLakeDataFileCacheEntry(tableSnapshot.getVersion(), activeFiles); }); return cacheEntry.getActiveFiles(); @@ -279,6 +314,7 @@ private List loadActiveFiles( MetadataEntry metadataEntry, ProtocolEntry protocolEntry, TupleDomain partitionConstraint, + Optional> addStatsMinMaxColumnFilter, ConnectorSession session) { List transactions = tableSnapshot.getTransactions(); @@ -290,7 +326,8 @@ private List loadActiveFiles( fileSystemFactory.create(session), fileFormatDataSourceStats, Optional.of(new MetadataAndProtocolEntry(metadataEntry, protocolEntry)), - partitionConstraint)) { + partitionConstraint, + addStatsMinMaxColumnFilter)) { return activeAddEntries(checkpointEntries, transactions) .filter(partitionConstraint.isAll() ? addAction -> true @@ -433,7 +470,7 @@ private Stream getEntries( List transactions = tableSnapshot.getTransactions(); // Passing TupleDomain.all() because this method is used for getting all entries Stream checkpointEntries = tableSnapshot.getCheckpointTransactionLogEntries( - session, entryTypes, checkpointSchemaManager, typeManager, fileSystem, stats, Optional.empty(), TupleDomain.all()); + session, entryTypes, checkpointSchemaManager, typeManager, fileSystem, stats, Optional.empty(), TupleDomain.all(), Optional.of(alwaysTrue())); return entryMapper.apply( checkpointEntries, diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java index 95b4ddb1fe26..9f0e4ea608e1 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointEntryIterator.java @@ -69,6 +69,7 @@ import java.util.OptionalLong; import java.util.Queue; import java.util.Set; +import java.util.function.Predicate; import static com.google.common.base.Preconditions.checkArgument; import static com.google.common.base.Verify.verify; @@ -159,7 +160,8 @@ public CheckpointEntryIterator( ParquetReaderOptions parquetReaderOptions, boolean checkpointRowStatisticsWritingEnabled, int domainCompactionThreshold, - TupleDomain partitionConstraint) + TupleDomain partitionConstraint, + Optional> addStatsMinMaxColumnFilter) { this.checkpointPath = checkpoint.location().toString(); this.session = requireNonNull(session, "session is null"); @@ -167,6 +169,7 @@ public CheckpointEntryIterator( this.stringMap = (MapType) typeManager.getType(TypeSignature.mapType(VARCHAR.getTypeSignature(), VARCHAR.getTypeSignature())); this.checkpointRowStatisticsWritingEnabled = checkpointRowStatisticsWritingEnabled; this.partitionConstraint = requireNonNull(partitionConstraint, "partitionConstraint is null"); + requireNonNull(addStatsMinMaxColumnFilter, "addStatsMinMaxColumnFilter is null"); checkArgument(!fields.isEmpty(), "fields is empty"); Map extractors = ImmutableMap.builder() .put(TRANSACTION, this::buildTxnEntry) @@ -182,14 +185,19 @@ public CheckpointEntryIterator( this.metadataEntry = metadataEntry.get(); checkArgument(protocolEntry.isPresent(), "Protocol entry must be provided when reading ADD entries from Checkpoint files"); this.protocolEntry = protocolEntry.get(); + checkArgument(addStatsMinMaxColumnFilter.isPresent(), "addStatsMinMaxColumnFilter must be provided when reading ADD entries from Checkpoint files"); this.schema = extractSchema(this.metadataEntry, this.protocolEntry, typeManager); this.columnsWithMinMaxStats = columnsWithStats(schema, this.metadataEntry.getOriginalPartitionColumns()); + Predicate columnStatsFilterFunction = addStatsMinMaxColumnFilter.orElseThrow(); + this.columnsWithMinMaxStats = columnsWithMinMaxStats.stream() + .filter(column -> columnStatsFilterFunction.test(column.getName())) + .collect(toImmutableList()); } ImmutableList.Builder columnsBuilder = ImmutableList.builderWithExpectedSize(fields.size()); ImmutableList.Builder> disjunctDomainsBuilder = ImmutableList.builderWithExpectedSize(fields.size()); for (EntryType field : fields) { - HiveColumnHandle column = buildColumnHandle(field, checkpointSchemaManager, this.metadataEntry, this.protocolEntry).toHiveColumnHandle(); + HiveColumnHandle column = buildColumnHandle(field, checkpointSchemaManager, this.metadataEntry, this.protocolEntry, addStatsMinMaxColumnFilter).toHiveColumnHandle(); columnsBuilder.add(column); disjunctDomainsBuilder.add(buildTupleDomainColumnHandle(field, column)); } @@ -220,11 +228,16 @@ public CheckpointEntryIterator( .collect(toImmutableList()); } - private DeltaLakeColumnHandle buildColumnHandle(EntryType entryType, CheckpointSchemaManager schemaManager, MetadataEntry metadataEntry, ProtocolEntry protocolEntry) + private DeltaLakeColumnHandle buildColumnHandle( + EntryType entryType, + CheckpointSchemaManager schemaManager, + MetadataEntry metadataEntry, + ProtocolEntry protocolEntry, + Optional> addStatsMinMaxColumnFilter) { Type type = switch (entryType) { case TRANSACTION -> schemaManager.getTxnEntryType(); - case ADD -> schemaManager.getAddEntryType(metadataEntry, protocolEntry, true, true, true); + case ADD -> schemaManager.getAddEntryType(metadataEntry, protocolEntry, addStatsMinMaxColumnFilter.orElseThrow(), true, true, true); case REMOVE -> schemaManager.getRemoveEntryType(); case METADATA -> schemaManager.getMetadataEntryType(); case PROTOCOL -> schemaManager.getProtocolEntryType(true, true); @@ -696,6 +709,12 @@ OptionalLong getCompletedPositions() return pageSource.getCompletedPositions(); } + @VisibleForTesting + long getCompletedBytes() + { + return pageSource.getCompletedBytes(); + } + @FunctionalInterface public interface CheckPointFieldExtractor { diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java index 3fb8df2d5b69..73a9fa04ad74 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointSchemaManager.java @@ -29,6 +29,7 @@ import java.util.List; import java.util.Optional; +import java.util.function.Predicate; import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.plugin.deltalake.transactionlog.DeltaLakeSchemaSupport.extractPartitionColumns; @@ -114,10 +115,19 @@ public RowType getMetadataEntryType() return metadataEntryType; } - public RowType getAddEntryType(MetadataEntry metadataEntry, ProtocolEntry protocolEntry, boolean requireWriteStatsAsJson, boolean requireWriteStatsAsStruct, boolean usePartitionValuesParsed) + public RowType getAddEntryType( + MetadataEntry metadataEntry, + ProtocolEntry protocolEntry, + Predicate addStatsMinMaxColumnFilter, + boolean requireWriteStatsAsJson, + boolean requireWriteStatsAsStruct, + boolean usePartitionValuesParsed) { List allColumns = extractSchema(metadataEntry, protocolEntry, typeManager); List minMaxColumns = columnsWithStats(metadataEntry, protocolEntry, typeManager); + minMaxColumns = minMaxColumns.stream() + .filter(column -> addStatsMinMaxColumnFilter.test(column.getName())) + .collect(toImmutableList()); boolean deletionVectorEnabled = isDeletionVectorEnabled(metadataEntry, protocolEntry); ImmutableList.Builder minMaxFields = ImmutableList.builder(); diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java index b8d0c1e38c74..18f522fabb57 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriter.java @@ -52,6 +52,7 @@ import java.util.Optional; import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Predicates.alwaysTrue; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableMap.toImmutableMap; import static io.airlift.slice.Slices.utf8Slice; @@ -112,7 +113,13 @@ public void write(CheckpointEntries entries, TrinoOutputFile outputFile) RowType protocolEntryType = checkpointSchemaManager.getProtocolEntryType(protocolEntry.getReaderFeatures().isPresent(), protocolEntry.getWriterFeatures().isPresent()); RowType txnEntryType = checkpointSchemaManager.getTxnEntryType(); // TODO https://github.com/trinodb/trino/issues/19586 Add support for writing 'partitionValues_parsed' field - RowType addEntryType = checkpointSchemaManager.getAddEntryType(entries.getMetadataEntry(), entries.getProtocolEntry(), writeStatsAsJson, writeStatsAsStruct, false); + RowType addEntryType = checkpointSchemaManager.getAddEntryType( + entries.getMetadataEntry(), + entries.getProtocolEntry(), + alwaysTrue(), + writeStatsAsJson, + writeStatsAsStruct, + false); RowType removeEntryType = checkpointSchemaManager.getRemoveEntryType(); List columnNames = ImmutableList.of( diff --git a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java index 0c797dc87c22..f70a24c22ebf 100644 --- a/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java +++ b/plugin/trino-delta-lake/src/main/java/io/trino/plugin/deltalake/transactionlog/checkpoint/CheckpointWriterManager.java @@ -39,6 +39,7 @@ import java.util.Optional; import static com.google.common.base.Preconditions.checkArgument; +import static com.google.common.base.Predicates.alwaysTrue; import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.plugin.deltalake.DeltaLakeErrorCode.DELTA_LAKE_INVALID_SCHEMA; import static io.trino.plugin.deltalake.transactionlog.TransactionLogParser.LAST_CHECKPOINT_FILENAME; @@ -105,7 +106,8 @@ public void writeCheckpoint(ConnectorSession session, TableSnapshot snapshot) fileSystem, fileFormatDataSourceStats, Optional.empty(), - TupleDomain.all()) + TupleDomain.all(), + Optional.empty()) .filter(entry -> entry.getMetaData() != null || entry.getProtocol() != null) .collect(toImmutableList()); @@ -138,7 +140,8 @@ public void writeCheckpoint(ConnectorSession session, TableSnapshot snapshot) fileSystem, fileFormatDataSourceStats, Optional.of(new MetadataAndProtocolEntry(metadataLogEntry.getMetaData(), protocolLogEntry.getProtocol())), - TupleDomain.all()) + TupleDomain.all(), + Optional.of(alwaysTrue())) .forEach(checkpointBuilder::addLogEntry); } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java index 63083a1e4a14..4f3f55503043 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeBasic.java @@ -1084,6 +1084,80 @@ private void testPartitionValuesParsed(String resourceName) .returnsEmptyResult(); } + /** + * @see databricks133.parsed_stats_struct + */ + @Test + public void testCheckpointFilteringForParsedStatsContainingNestedRows() + throws Exception + { + String tableName = "test_parsed_stats_struct_" + randomNameSuffix(); + Path tableLocation = Files.createTempFile(tableName, null); + copyDirectoryContents(new File(Resources.getResource("databricks133/parsed_stats_struct").toURI()).toPath(), tableLocation); + + assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri())); + assertThat(query("SELECT * FROM " + tableName)) + .skippingTypesCheck() + .matches(""" + VALUES + (100, 1, row(1, 'ala')), + (200, 2, row(2, 'kota')), + (300, 3, row(3, 'osla')), + (400, 4, row(4, 'zulu'))"""); + + Session session = Session.builder(getQueryRunner().getDefaultSession()) + .setCatalogSessionProperty("delta", "checkpoint_filtering_enabled", "true") + .build(); + assertThat(query(session, "SELECT id FROM " + tableName + " WHERE part BETWEEN 100 AND 300")).matches("VALUES 1, 2, 3"); + assertThat(query(session, "SELECT root.entry_two FROM " + tableName + " WHERE part BETWEEN 100 AND 300")) + .skippingTypesCheck() + .matches("VALUES 'ala', 'kota', 'osla'"); + // show stats with predicate + assertThat(query(session, "SHOW STATS FOR (SELECT id FROM " + tableName + " WHERE part = 100)")) + .skippingTypesCheck() + .matches(""" + VALUES + ('id', NULL, NULL, DOUBLE '0.0' , NULL, '1', '1'), + (NULL, NULL, NULL, NULL, DOUBLE '1.0', NULL, NULL)"""); + } + + /** + * @see databricks133.parsed_stats_case_sensitive + */ + @Test + public void testCheckpointFilteringForParsedStatsWithCaseSensitiveColumnNames() + throws Exception + { + String tableName = "test_parsed_stats_case_sensitive_" + randomNameSuffix(); + Path tableLocation = Files.createTempFile(tableName, null); + copyDirectoryContents(new File(Resources.getResource("databricks133/parsed_stats_case_sensitive").toURI()).toPath(), tableLocation); + + assertUpdate("CALL system.register_table('%s', '%s', '%s')".formatted(getSession().getSchema().orElseThrow(), tableName, tableLocation.toUri())); + assertThat(query("SELECT * FROM " + tableName)) + .skippingTypesCheck() + .matches(""" + VALUES + (100, 1, 'ala'), + (200, 2, 'kota'), + (300, 3, 'osla'), + (400, 4, 'zulu')"""); + + Session session = Session.builder(getQueryRunner().getDefaultSession()) + .setCatalogSessionProperty("delta", "checkpoint_filtering_enabled", "true") + .build(); + assertThat(query(session, "SELECT a_NuMbEr FROM " + tableName + " WHERE part BETWEEN 100 AND 300")).matches("VALUES 1, 2, 3"); + assertThat(query(session, "SELECT a_StRiNg FROM " + tableName + " WHERE part BETWEEN 100 AND 300")) + .skippingTypesCheck() + .matches("VALUES 'ala', 'kota', 'osla'"); + // show stats with predicate + assertThat(query(session, "SHOW STATS FOR (SELECT a_NuMbEr FROM " + tableName + " WHERE part BETWEEN 100 AND 300)")) + .skippingTypesCheck() + .matches(""" + VALUES + ('a_NuMbEr', NULL, NULL, DOUBLE '0.0' , NULL, '1', '3'), + (NULL, NULL, NULL, NULL, DOUBLE '3.0', NULL, NULL)"""); + } + /** * @see deltalake.partition_values_parsed_all_types */ diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java index 5c9f3dd08da9..20ba4c7e2343 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/TestDeltaLakeSplitManager.java @@ -56,6 +56,7 @@ import java.util.List; import java.util.Optional; +import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.stream.Collectors; @@ -185,7 +186,13 @@ private DeltaLakeSplitManager setupSplitManager(List addFileEntrie new ParquetReaderConfig()) { @Override - public List getActiveFiles(TableSnapshot tableSnapshot, MetadataEntry metadataEntry, ProtocolEntry protocolEntry, TupleDomain partitionConstraint, ConnectorSession session) + public List getActiveFiles( + TableSnapshot tableSnapshot, + MetadataEntry metadataEntry, + ProtocolEntry protocolEntry, + TupleDomain partitionConstraint, + Optional> projectedColumns, + ConnectorSession session) { return addFileEntries; } diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/TestTableSnapshot.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/TestTableSnapshot.java index 95c1869a3d37..dbc19f558368 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/TestTableSnapshot.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/TestTableSnapshot.java @@ -43,6 +43,7 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Stream; +import static com.google.common.base.Predicates.alwaysTrue; import static com.google.common.collect.ImmutableList.toImmutableList; import static io.trino.filesystem.TrackingFileSystemFactory.OperationType.INPUT_FILE_NEW_STREAM; import static io.trino.plugin.deltalake.transactionlog.TableSnapshot.MetadataAndProtocolEntry; @@ -142,7 +143,15 @@ public void readsCheckpointFile() ProtocolEntry protocolEntry = transactionLogAccess.getProtocolEntry(SESSION, tableSnapshot); tableSnapshot.setCachedMetadata(Optional.of(metadataEntry)); try (Stream stream = tableSnapshot.getCheckpointTransactionLogEntries( - SESSION, ImmutableSet.of(ADD), checkpointSchemaManager, TESTING_TYPE_MANAGER, trackingFileSystem, new FileFormatDataSourceStats(), Optional.of(new MetadataAndProtocolEntry(metadataEntry, protocolEntry)), TupleDomain.all())) { + SESSION, + ImmutableSet.of(ADD), + checkpointSchemaManager, + TESTING_TYPE_MANAGER, + trackingFileSystem, + new FileFormatDataSourceStats(), + Optional.of(new MetadataAndProtocolEntry(metadataEntry, protocolEntry)), + TupleDomain.all(), + Optional.of(alwaysTrue()))) { List entries = stream.collect(toImmutableList()); assertThat(entries).hasSize(9); @@ -184,7 +193,15 @@ public void readsCheckpointFile() // lets read two entry types in one call; add and protocol try (Stream stream = tableSnapshot.getCheckpointTransactionLogEntries( - SESSION, ImmutableSet.of(ADD, PROTOCOL), checkpointSchemaManager, TESTING_TYPE_MANAGER, trackingFileSystem, new FileFormatDataSourceStats(), Optional.of(new MetadataAndProtocolEntry(metadataEntry, protocolEntry)), TupleDomain.all())) { + SESSION, + ImmutableSet.of(ADD, PROTOCOL), + checkpointSchemaManager, + TESTING_TYPE_MANAGER, + trackingFileSystem, + new FileFormatDataSourceStats(), + Optional.of(new MetadataAndProtocolEntry(metadataEntry, protocolEntry)), + TupleDomain.all(), + Optional.of(alwaysTrue()))) { List entries = stream.collect(toImmutableList()); assertThat(entries).hasSize(10); diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointEntryIterator.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointEntryIterator.java index 245b602ad40b..3857f0e72d7d 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointEntryIterator.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointEntryIterator.java @@ -31,6 +31,7 @@ import io.trino.plugin.deltalake.transactionlog.MetadataEntry; import io.trino.plugin.deltalake.transactionlog.ProtocolEntry; import io.trino.plugin.deltalake.transactionlog.RemoveFileEntry; +import io.trino.plugin.deltalake.transactionlog.statistics.DeltaLakeParquetFileStatistics; import io.trino.plugin.hive.FileFormatDataSourceStats; import io.trino.plugin.hive.parquet.ParquetReaderConfig; import io.trino.spi.predicate.TupleDomain; @@ -53,14 +54,20 @@ import java.util.Objects; import java.util.Optional; import java.util.OptionalInt; +import java.util.Random; import java.util.Set; import java.util.UUID; +import java.util.function.Predicate; +import java.util.stream.Collectors; import java.util.stream.IntStream; +import static com.google.common.base.Predicates.alwaysTrue; +import static com.google.common.collect.ImmutableMap.toImmutableMap; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.io.Resources.getResource; import static com.google.common.math.LongMath.divide; import static io.airlift.slice.Slices.utf8Slice; +import static io.airlift.units.DataSize.Unit.KILOBYTE; import static io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR; import static io.trino.plugin.deltalake.DeltaTestingConnectorSession.SESSION; import static io.trino.plugin.deltalake.transactionlog.checkpoint.CheckpointEntryIterator.EntryType.ADD; @@ -124,7 +131,7 @@ public void testReadNoEntries() throws Exception { URI checkpointUri = getResource(TEST_CHECKPOINT).toURI(); - assertThatThrownBy(() -> createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(), Optional.empty(), Optional.empty(), TupleDomain.all())) + assertThatThrownBy(() -> createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(), Optional.empty(), Optional.empty(), TupleDomain.all(), Optional.empty())) .isInstanceOf(IllegalArgumentException.class) .hasMessage("fields is empty"); } @@ -168,7 +175,7 @@ public void testReadProtocolEntries() throws Exception { URI checkpointUri = getResource(TEST_CHECKPOINT).toURI(); - CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(PROTOCOL), Optional.empty(), Optional.empty(), TupleDomain.all()); + CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(PROTOCOL), Optional.empty(), Optional.empty(), TupleDomain.all(), Optional.empty()); List entries = ImmutableList.copyOf(checkpointEntryIterator); assertThat(entries).hasSize(1); @@ -186,7 +193,13 @@ public void testReadMetadataAndProtocolEntry() throws Exception { URI checkpointUri = getResource(TEST_CHECKPOINT).toURI(); - CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(METADATA, PROTOCOL), Optional.empty(), Optional.empty(), TupleDomain.all()); + CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator( + checkpointUri, + ImmutableSet.of(METADATA, PROTOCOL), + Optional.empty(), + Optional.empty(), + TupleDomain.all(), + Optional.empty()); List entries = ImmutableList.copyOf(checkpointEntryIterator); assertThat(entries).hasSize(2); @@ -229,7 +242,13 @@ public void testReadAddEntries() throws Exception { URI checkpointUri = getResource(TEST_CHECKPOINT).toURI(); - CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(ADD), Optional.of(readMetadataEntry(checkpointUri)), Optional.of(readProtocolEntry(checkpointUri)), TupleDomain.all()); + CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator( + checkpointUri, + ImmutableSet.of(ADD), + Optional.of(readMetadataEntry(checkpointUri)), + Optional.of(readProtocolEntry(checkpointUri)), + TupleDomain.all(), + Optional.of(alwaysTrue())); List entries = ImmutableList.copyOf(checkpointEntryIterator); assertThat(entries).hasSize(9); @@ -300,7 +319,8 @@ public void testReadAddEntriesPartitionPruning() ImmutableSet.of(ADD), Optional.of(readMetadataEntry(checkpointUri)), Optional.of(readProtocolEntry(checkpointUri)), - TupleDomain.withColumnDomains(ImmutableMap.of(intPartField, singleValue(BIGINT, 10L), stringPartField, singleValue(VARCHAR, utf8Slice("part1"))))); + TupleDomain.withColumnDomains(ImmutableMap.of(intPartField, singleValue(BIGINT, 10L), stringPartField, singleValue(VARCHAR, utf8Slice("part1")))), + Optional.of(alwaysTrue())); List partitionsEntries = ImmutableList.copyOf(partitionsEntryIterator); assertThat(partitionsEntryIterator.getCompletedPositions().orElseThrow()).isEqualTo(5); @@ -315,7 +335,8 @@ public void testReadAddEntriesPartitionPruning() ImmutableSet.of(ADD), Optional.of(readMetadataEntry(checkpointUri)), Optional.of(readProtocolEntry(checkpointUri)), - TupleDomain.withColumnDomains(ImmutableMap.of(intPartField, singleValue(BIGINT, 10L)))); + TupleDomain.withColumnDomains(ImmutableMap.of(intPartField, singleValue(BIGINT, 10L))), + Optional.of(alwaysTrue())); List partitionEntries = ImmutableList.copyOf(partitionEntryIterator); assertThat(partitionEntryIterator.getCompletedPositions().orElseThrow()).isEqualTo(5); @@ -332,7 +353,8 @@ public void testReadAddEntriesPartitionPruning() Optional.of(readProtocolEntry(checkpointUri)), TupleDomain.withColumnDomains(ImmutableMap.of( intPartField, singleValue(BIGINT, 10L), - stringPartField, singleValue(VARCHAR, utf8Slice("unmatched partition condition"))))); + stringPartField, singleValue(VARCHAR, utf8Slice("unmatched partition condition")))), + Optional.of(alwaysTrue())); assertThat(ImmutableList.copyOf(emptyIterator)).isEmpty(); // Verify IS NULL condition @@ -343,7 +365,8 @@ intPartField, singleValue(BIGINT, 10L), Optional.of(readProtocolEntry(checkpointUri)), TupleDomain.withColumnDomains(ImmutableMap.of( intPartField, onlyNull(BIGINT), - stringPartField, onlyNull(VARCHAR)))); + stringPartField, onlyNull(VARCHAR))), + Optional.of(alwaysTrue())); assertThat(ImmutableList.copyOf(isNullIterator)) .hasSize(1) .extracting(entry -> entry.getAdd().getPath()) @@ -357,7 +380,8 @@ intPartField, onlyNull(BIGINT), Optional.of(readProtocolEntry(checkpointUri)), TupleDomain.withColumnDomains(ImmutableMap.of( intPartField, notNull(BIGINT), - stringPartField, notNull(VARCHAR)))); + stringPartField, notNull(VARCHAR))), + Optional.of(alwaysTrue())); assertThat(ImmutableList.copyOf(isNotNullIterator)) .hasSize(2) .extracting(entry -> entry.getAdd().getPath()) @@ -402,13 +426,133 @@ private void assertPartitionValuesParsedCondition(URI checkpointUri, String colu ImmutableSet.of(ADD), Optional.of(readMetadataEntry(checkpointUri)), Optional.of(readProtocolEntry(checkpointUri)), - TupleDomain.withColumnDomains(ImmutableMap.of(intPartField, singleValue(type, value)))); + TupleDomain.withColumnDomains(ImmutableMap.of(intPartField, singleValue(type, value))), + Optional.of(alwaysTrue())); List partitionEntries = ImmutableList.copyOf(partitionEntryIterator); assertThat(partitionEntryIterator.getCompletedPositions().orElseThrow()).isEqualTo(5); assertThat(partitionEntries).hasSize(1); } + @Test + public void testReadAddEntriesStatsProjection() + throws Exception + { + int countIntegerColumns = 20; + int countStringColumns = 20; + MetadataEntry metadataEntry = new MetadataEntry( + "metadataId", + "metadataName", + "metadataDescription", + new MetadataEntry.Format( + "metadataFormatProvider", + ImmutableMap.of()), + "{\"type\":\"struct\",\"fields\":" + + "[" + + IntStream.rangeClosed(1, countIntegerColumns) + .boxed() + .map("{\"name\":\"intcol%s\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}}"::formatted) + .collect(Collectors.joining(",", "", ",")) + + IntStream.rangeClosed(1, countStringColumns) + .boxed() + .map("{\"name\":\"stringcol%s\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}"::formatted) + .collect(Collectors.joining(",", "", ",")) + + "{\"name\":\"part_key\",\"type\":\"timestamp\",\"nullable\":true,\"metadata\":{}}]}", + ImmutableList.of("part_key"), + ImmutableMap.of("delta.checkpoint.writeStatsAsJson", "false", "delta.checkpoint.writeStatsAsStruct", "true"), + 1000); + ProtocolEntry protocolEntry = new ProtocolEntry(10, 20, Optional.empty(), Optional.empty()); + + int countAddEntries = 30; + Set addFileEntries = IntStream.rangeClosed(1, countAddEntries).mapToObj(fileIndex -> new AddFileEntry( + "addFilePathParquetStats" + fileIndex, + ImmutableMap.of(), + 1000, + 1001, + true, + Optional.empty(), + Optional.of(createDeltaLakeParquetFileStatistics(countIntegerColumns, countStringColumns)), + ImmutableMap.of(), + Optional.empty())) + .collect(toImmutableSet()); + + CheckpointEntries entries = new CheckpointEntries( + metadataEntry, + protocolEntry, + ImmutableSet.of(), + addFileEntries, + ImmutableSet.of()); + + CheckpointWriter writer = new CheckpointWriter( + TESTING_TYPE_MANAGER, + checkpointSchemaManager, + "test", + ParquetWriterOptions.builder().build()); + + File targetFile = File.createTempFile("testAddStatsProjection-", ".checkpoint.parquet"); + targetFile.deleteOnExit(); + + String targetPath = "file://" + targetFile.getAbsolutePath(); + targetFile.delete(); // file must not exist when writer is called + writer.write(entries, createOutputFile(targetPath)); + + MetadataEntry readMetadataEntry = readMetadataEntry(URI.create(targetPath)); + ProtocolEntry readProtocolEntry = readProtocolEntry(URI.create(targetPath)); + CheckpointEntryIterator checkpointEntryWithNameColumnStatsIterator = createCheckpointEntryIterator( + URI.create(targetPath), + ImmutableSet.of(ADD), + Optional.of(readMetadataEntry), + Optional.of(readProtocolEntry), + TupleDomain.all(), + Optional.of(columnName -> ImmutableList.of("intcol1", "stringcol1").contains(columnName))); + List checkpointEntryWithNameColumnStatsEntries = ImmutableList.copyOf(checkpointEntryWithNameColumnStatsIterator); + assertThat(checkpointEntryWithNameColumnStatsEntries).hasSize(countAddEntries); + CheckpointEntryIterator checkpointEntryWithAllColumnStatsIterator = createCheckpointEntryIterator( + URI.create(targetPath), + ImmutableSet.of(ADD), + Optional.of(readMetadataEntry), + Optional.of(readProtocolEntry), + TupleDomain.all(), + Optional.of(alwaysTrue())); + List checkpointEntryWithAllStatsEntries = ImmutableList.copyOf(checkpointEntryWithAllColumnStatsIterator); + assertThat(checkpointEntryWithAllStatsEntries).hasSize(countAddEntries); + assertThat(checkpointEntryWithNameColumnStatsIterator.getCompletedBytes()).isLessThan(checkpointEntryWithAllColumnStatsIterator.getCompletedBytes() * 9 / 10); + } + + private static DeltaLakeParquetFileStatistics createDeltaLakeParquetFileStatistics(int countIntegerColumns, int countStringColumns) + { + Random random = new Random(); + Map minValues = ImmutableMap.builder() + .putAll(IntStream.rangeClosed(1, countIntegerColumns) + .boxed() + .collect(toImmutableMap("intcol%s"::formatted, columnIndex -> random.nextLong(0, 1000)))) + .putAll(IntStream.rangeClosed(1, countStringColumns) + .boxed() + .collect(toImmutableMap("stringcol%s"::formatted, columnIndex -> "A".repeat(random.nextInt(0, 10)) + UUID.randomUUID()))) + .buildOrThrow(); + Map maxValues = ImmutableMap.builder() + .putAll(IntStream.rangeClosed(1, countIntegerColumns) + .boxed() + .collect(toImmutableMap("intcol%s"::formatted, columnIndex -> 1000L + random.nextLong(0, 1000)))) + .putAll(IntStream.rangeClosed(1, countStringColumns) + .boxed() + .collect(toImmutableMap("stringcol%s"::formatted, columnIndex -> "Z".repeat(random.nextInt(0, 10)) + UUID.randomUUID()))) + .buildOrThrow(); + Map nullCount = ImmutableMap.builder() + .putAll(IntStream.rangeClosed(1, countIntegerColumns) + .boxed() + .collect(toImmutableMap("intcol%s"::formatted, columnIndex -> random.nextLong(0, 1000)))) + .putAll(IntStream.rangeClosed(1, countStringColumns) + .boxed() + .collect(toImmutableMap("stringcol%s"::formatted, columnIndex -> random.nextLong(0, 1000)))) + .buildOrThrow(); + return new DeltaLakeParquetFileStatistics( + Optional.of(1000L), + Optional.of(minValues), + Optional.of(maxValues), + Optional.of(nullCount)); + } + @Test public void testReadAllEntries() throws Exception @@ -420,7 +564,8 @@ public void testReadAllEntries() ImmutableSet.of(METADATA, PROTOCOL, TRANSACTION, ADD, REMOVE, COMMIT), Optional.of(readMetadataEntry(checkpointUri)), Optional.of(readProtocolEntry(checkpointUri)), - TupleDomain.all()); + TupleDomain.all(), + Optional.of(alwaysTrue())); List entries = ImmutableList.copyOf(checkpointEntryIterator); assertThat(entries).hasSize(17); @@ -542,17 +687,18 @@ public void testSkipRemoveEntries() writer.write(entries, createOutputFile(targetPath)); CheckpointEntryIterator metadataAndProtocolEntryIterator = - createCheckpointEntryIterator(URI.create(targetPath), ImmutableSet.of(METADATA, PROTOCOL), Optional.empty(), Optional.empty(), TupleDomain.all()); + createCheckpointEntryIterator(URI.create(targetPath), ImmutableSet.of(METADATA, PROTOCOL), Optional.empty(), Optional.empty(), TupleDomain.all(), Optional.empty()); CheckpointEntryIterator addEntryIterator = createCheckpointEntryIterator( URI.create(targetPath), ImmutableSet.of(ADD), Optional.of(metadataEntry), Optional.of(protocolEntry), - TupleDomain.all()); + TupleDomain.all(), + Optional.of(alwaysTrue())); CheckpointEntryIterator removeEntryIterator = - createCheckpointEntryIterator(URI.create(targetPath), ImmutableSet.of(REMOVE), Optional.empty(), Optional.empty(), TupleDomain.all()); + createCheckpointEntryIterator(URI.create(targetPath), ImmutableSet.of(REMOVE), Optional.empty(), Optional.empty(), TupleDomain.all(), Optional.empty()); CheckpointEntryIterator txnEntryIterator = - createCheckpointEntryIterator(URI.create(targetPath), ImmutableSet.of(TRANSACTION), Optional.empty(), Optional.empty(), TupleDomain.all()); + createCheckpointEntryIterator(URI.create(targetPath), ImmutableSet.of(TRANSACTION), Optional.empty(), Optional.empty(), TupleDomain.all(), Optional.empty()); assertThat(Iterators.size(metadataAndProtocolEntryIterator)).isEqualTo(2); assertThat(Iterators.size(addEntryIterator)).isEqualTo(1); @@ -568,14 +714,26 @@ public void testSkipRemoveEntries() private MetadataEntry readMetadataEntry(URI checkpointUri) throws IOException { - CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(METADATA), Optional.empty(), Optional.empty(), TupleDomain.all()); + CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator( + checkpointUri, + ImmutableSet.of(METADATA), + Optional.empty(), + Optional.empty(), + TupleDomain.all(), + Optional.empty()); return Iterators.getOnlyElement(checkpointEntryIterator).getMetaData(); } private ProtocolEntry readProtocolEntry(URI checkpointUri) throws IOException { - CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator(checkpointUri, ImmutableSet.of(PROTOCOL), Optional.empty(), Optional.empty(), TupleDomain.all()); + CheckpointEntryIterator checkpointEntryIterator = createCheckpointEntryIterator( + checkpointUri, + ImmutableSet.of(PROTOCOL), + Optional.empty(), + Optional.empty(), + TupleDomain.all(), + Optional.empty()); return Iterators.getOnlyElement(checkpointEntryIterator).getProtocol(); } @@ -584,7 +742,8 @@ private CheckpointEntryIterator createCheckpointEntryIterator( Set entryTypes, Optional metadataEntry, Optional protocolEntry, - TupleDomain partitionConstraint) + TupleDomain partitionConstraint, + Optional> addStatsMinMaxColumnFilter) throws IOException { TrinoFileSystem fileSystem = new HdfsFileSystemFactory(HDFS_ENVIRONMENT, HDFS_FILE_SYSTEM_STATS).create(SESSION); @@ -600,10 +759,14 @@ private CheckpointEntryIterator createCheckpointEntryIterator( metadataEntry, protocolEntry, new FileFormatDataSourceStats(), - new ParquetReaderConfig().toParquetReaderOptions(), + new ParquetReaderConfig() + .setMaxBufferSize(DataSize.ofBytes(500)) + .setSmallFileThreshold(DataSize.of(1, KILOBYTE)) + .toParquetReaderOptions(), true, new DeltaLakeConfig().getDomainCompactionThreshold(), - partitionConstraint); + partitionConstraint, + addStatsMinMaxColumnFilter); } private static TrinoOutputFile createOutputFile(String path) diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointWriter.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointWriter.java index 31aec13a79c3..4a2354fb137b 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointWriter.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/checkpoint/TestCheckpointWriter.java @@ -50,6 +50,7 @@ import java.util.Map; import java.util.Optional; +import static com.google.common.base.Predicates.alwaysTrue; import static com.google.common.collect.ImmutableList.toImmutableList; import static com.google.common.collect.ImmutableSet.toImmutableSet; import static com.google.common.collect.Iterables.getOnlyElement; @@ -481,7 +482,8 @@ private CheckpointEntries readCheckpoint(String checkpointPath, MetadataEntry me new ParquetReaderConfig().toParquetReaderOptions(), rowStatisticsEnabled, new DeltaLakeConfig().getDomainCompactionThreshold(), - TupleDomain.all()); + TupleDomain.all(), + Optional.of(alwaysTrue())); CheckpointBuilder checkpointBuilder = new CheckpointBuilder(); while (checkpointEntryIterator.hasNext()) { diff --git a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/statistics/TestDeltaLakeFileStatistics.java b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/statistics/TestDeltaLakeFileStatistics.java index 5e785a542f4c..8159b9b9fce2 100644 --- a/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/statistics/TestDeltaLakeFileStatistics.java +++ b/plugin/trino-delta-lake/src/test/java/io/trino/plugin/deltalake/transactionlog/statistics/TestDeltaLakeFileStatistics.java @@ -46,6 +46,7 @@ import java.util.Optional; import java.util.OptionalInt; +import static com.google.common.base.Predicates.alwaysTrue; import static com.google.common.collect.Iterators.getOnlyElement; import static io.airlift.slice.Slices.utf8Slice; import static io.trino.plugin.deltalake.DeltaLakeColumnType.REGULAR; @@ -106,7 +107,8 @@ public void testParseParquetStatistics() new ParquetReaderConfig().toParquetReaderOptions(), true, new DeltaLakeConfig().getDomainCompactionThreshold(), - TupleDomain.all()); + TupleDomain.all(), + Optional.empty()); MetadataEntry metadataEntry = getOnlyElement(metadataEntryIterator).getMetaData(); CheckpointEntryIterator protocolEntryIterator = new CheckpointEntryIterator( checkpointFile, @@ -121,7 +123,8 @@ public void testParseParquetStatistics() new ParquetReaderConfig().toParquetReaderOptions(), true, new DeltaLakeConfig().getDomainCompactionThreshold(), - TupleDomain.all()); + TupleDomain.all(), + Optional.empty()); ProtocolEntry protocolEntry = getOnlyElement(protocolEntryIterator).getProtocol(); CheckpointEntryIterator checkpointEntryIterator = new CheckpointEntryIterator( @@ -137,7 +140,8 @@ public void testParseParquetStatistics() new ParquetReaderConfig().toParquetReaderOptions(), true, new DeltaLakeConfig().getDomainCompactionThreshold(), - TupleDomain.all()); + TupleDomain.all(), + Optional.of(alwaysTrue())); DeltaLakeTransactionLogEntry matchingAddFileEntry = null; while (checkpointEntryIterator.hasNext()) { DeltaLakeTransactionLogEntry entry = checkpointEntryIterator.next(); diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/README.txt b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/README.txt new file mode 100644 index 000000000000..c87762ee3782 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/README.txt @@ -0,0 +1,13 @@ +Data generated using Databricks 13.3 LTS + +``` +CREATE TABLE default.parsed_stats_case_sensitive (part INT, a_NuMbEr INT, a_StRiNg STRING) +USING DELTA +PARTITIONED BY (part) +LOCATION 's3://.../parsed_stats_case_sensitive' +TBLPROPERTIES (delta.checkpointInterval = 2); + +insert into default.parsed_stats_case_sensitive VALUES (100, 1,'ala'), (200, 2, 'kota'); +insert into default.parsed_stats_case_sensitive VALUES (300, 3, 'osla'); +insert into default.parsed_stats_case_sensitive VALUES (400, 4, 'zulu'); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..b39a26449787 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1700647167062,"userId":"615774135840106","userName":"marius.grama@starburstdata.com","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[\"part\"]","description":null,"isManaged":"false","properties":"{\"delta.checkpointInterval\":\"2\"}","statsOnLoad":false},"notebook":{"notebookId":"2900318529393697"},"clusterId":"0905-151610-v55wl6f5","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"756c1d88-796f-4bb1-8076-3128c860cc51"}} +{"metaData":{"id":"480a0934-0451-4c37-b4bf-ce84c878e145","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"part\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a_NuMbEr\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"a_StRiNg\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["part"],"configuration":{"delta.checkpointInterval":"2"},"createdTime":1700647166713}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..9417e7c1643e --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1700647172873,"userId":"615774135840106","userName":"marius.grama@starburstdata.com","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"2900318529393697"},"clusterId":"0905-151610-v55wl6f5","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"2","numOutputBytes":"1671"},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"24dc16da-5d97-48e8-a9dd-e5700cf49f7b"}} +{"add":{"path":"part=100/part-00000-c707739f-45c0-4722-9837-4b11599ea66d.c000.snappy.parquet","partitionValues":{"part":"100"},"size":832,"modificationTime":1700647173000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"a_NuMbEr\":1,\"a_StRiNg\":\"ala\"},\"maxValues\":{\"a_NuMbEr\":1,\"a_StRiNg\":\"ala\"},\"nullCount\":{\"a_NuMbEr\":0,\"a_StRiNg\":0}}","tags":{"INSERTION_TIME":"1700647173000000","MIN_INSERTION_TIME":"1700647173000000","MAX_INSERTION_TIME":"1700647173000000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part=200/part-00000-9cfa2c9c-efae-4e0a-bf25-eba86bfdce11.c000.snappy.parquet","partitionValues":{"part":"200"},"size":839,"modificationTime":1700647173000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"a_NuMbEr\":2,\"a_StRiNg\":\"kota\"},\"maxValues\":{\"a_NuMbEr\":2,\"a_StRiNg\":\"kota\"},\"nullCount\":{\"a_NuMbEr\":0,\"a_StRiNg\":0}}","tags":{"INSERTION_TIME":"1700647173000001","MIN_INSERTION_TIME":"1700647173000001","MAX_INSERTION_TIME":"1700647173000001","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000002.checkpoint.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000002.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4091b2c9990462f7980adffb75b9c85e923a239d GIT binary patch literal 20131 zcmeHP4RBP~b$Y;*h32~WLNnN6GsOv&XG`NXCMUthp4)z+q??<_sU)d9f1Mqj~3ms ziM~F+%PZD5xtjbQzbnw#STELk+;xFKUCo+qj#xK=X zd&?oS$&#z!eDf!_4_Fz_?(JJ|{eg+&TA!Cc6`U8(zJ4_?J&qNcpgMm3#y1b=syzW# zHoL+K^)+fDze`U2>w!0IEyRWruMZT1ARS%nR`v9DJkhnOw`1+9c~G;qqifR|p0lpl zu$TUYO`Z&>pH zy!MoFLIW5a`~XKc@sJ)F)TMt)r72%;!s4X%SjyG)J ztnTUs8;aaJb|j9Ca8^(Cf|er3=14sJ$N13Y;y&EZt=Z3Ub)Fz+jl`l$P3v9U%lmPx zoa7<$`q}?Fs2{7(wNpWFS^^;GdH7om^8#!Mjs5+f9JpT0=04QZ9{`$1c=Faie(O#B zWXZ2kfat8R0@HmV@x4E#-a#>ovQBg#7OsOsDA@y0Py8rpB`3%J@JEF@4K8X@8zU@e z!!)-VdMRRUBo-1uihIvPR>NPu^w*bRMk_85_|qQ{0DaGMFW;CfNb(t~+@Ocq(%<_< zY^FtWKKS{|-~N}k&FavA_nza{d3id3cJS2`-~R5nzE!bT@$sN797537Ow4}W-#-oR z`-MNb_KH59SoSKNRyM8@bm(nzi|*z+ugmSNuXTC-5I6X|zCM>bb2e3ciu`WM-i6?a;Py02g_0gnH<~mxXwJ1 z23}zHF>|Xc7IdYjhTq$pufo9!`!my9iUzI&=k1R8wm}K}V=NvW^2Z(3j=rE2Lbv)` z;AO=De1$=mqq=m-p0 zgG@63@VWM6e&|&_&=w=JuYc#jxJi`_v^cj(QR4sjSjfYZ`Q-B1-KVMhQii#(incRT z7Y$h0=7nE=_h%3euppYXLH5uFZD9CsPW)oZ*^etq#?9`j6DTh=l?xpF5_36Uz|Hh% zE*DyH1ZfD51s4sIvI^o=2#9Hy=^!!+!{vQI1ckMNupo{VA(RhYwFTnma>0Uev)0zu ziem{s<${ex%(~ZV6b!FfAgsMM0hx*FNnu@_9LJUmWod&}4WUFAc~ZTV3;8VcROQTS zJU+(8IJB9?Wi(<-8<}MRQCAId9t-X;fT7WyPgMgBZGaAyl?Gs%7Hw+W1;eBk^;B2~ z#5mNtY70b6P)j2v4a=HMWAGaC4=`4XIu5KZ&FhbGoc?b@!EjY(vKVLB4ApAO#2e+o z^v7&KxuBLZCIxHHbm>$ZCa<5=+GhnZpbDGDkzx0tT9|4LoaU7d)#L%E<7(9smQZMXUR1s0= zV${GB(T;E^7?xIUQD2$iRc@rOFDAvoAHhQ*cBLcy*tWP7Q}pT=qk&*p3rapewv#I5xp^ zObQUQED{YtQENq%PJL^WSrurhE^L#}Qe8NF*dv>o?~)Y+k8)U~@z00;D29+Z4a<-r zA2b3TALUWzgHq*2rGdy6iOfqiM@n0&4klU*^m1S@)oiF8l}TIU66ig!IvA4HDw|5= zaBoZ+fIppf27=pg&%F|+9VwBuZVocvQ=8pykaZ+iu~emufdnF+0hmW4Gn~mPi|+6?5(V!U@VRk z1v`)j9$`QV$2HnH{z$kl*gq7d?r0VchgxZ&KPsVjrEU#147!L%{E-lxbkV6+0+X*t z5fek8A-AAbqg??OG+YgcLs2Q4P8B~(vF?U45E&4IVRhc9D@(3it*DKv zGFfrz8}#IP@>C9Rd_lRreDV<|WGafv>vJKVtit6=UZ#kdFW_7FR;R6{0k-{iNJoH* zAZ7E~8dAoeaFd0cvmK6DbcE_^#z@-@q?eF%jFRq#REwTgzxFZu-6jLTPuCEhf1d)J z2LL8DER=MCq?CuO{8@|hJkVOJI??nVx%)&y(t)BNqv7Es!rnz@Lw`J}*XiUMBJlfa z$w4#cJO^hDMdgcgt>x_&=$FwgYO6c0>a@cU!cOCIH% z<6zKwZYdFsj`O(9`LYc-tLYrUrsjB%b82lG&R3B0S<3ksBpW$(MnFGxK@;WqDF2xA zSbhs+iQpF)&`-V2CJ<}5o;Wz?J~-Ra*#Ql>QU>47kDB?NW@zl3$hX5tq|Go1hD21F zP7HSf<0JLN$2mWM4DHm6+4$S}{bv4M6Et^D6e4RaQzfM2vC?pS2|4a+fKz(s7^Lr& zx!i>2VOX*C5g}D0x}~u>2H$WT@vbzWUmAgv^J(x5ktBm0DK9DPq-kPXz$CXG|5e2Q@L^KTIWOEdZD&-APVLNv`!ejz z50Tli7VOLsVcO2D{AM3}lWB91UXZ1HU`EJvG1uq=1}Tpt#qnS9$?v$IdoRP`?CRu8z{!z<|)^UM;}TeP!DopR1V&lc1(1v<+&@=rU@EP?$B zCUBK3sVzB6s%5(Ot^+4>dNuA>4n360E;+-V#^dcY@xvw{ZJGLx#?)`XfTz6KF*TK5 zT}Re|y53t&zQQ>VFU?$hoomHy`QfFIuJph!2t+^iEKZ_^CLYIht^6k(Zh8*|&%6=P$ zTzk^9co$h*Y{wx_xTau{bP&W?BwWA(Z@{S&`vBf2z3R~hSy+EZvmQ`Z8r#|o^dgpTSlA-#5sgG{N4t> zz-i-;yWwj^ZmZyP7rQyPV&e$l_J zqpP>M0d6J>LP`VWu7{9QSL3az<%A_EEn;5|sb5~JiAJ8SQSV#}f44M~kOa=|n2*k+ zJ-v>?DDZwb9NW$mJyf$x<4S)tfKVhGa@}Q@>>!?kFZo~wCzB8<5MbDuXaujAFoLWV z!j8F>JgXYoG0p?xAfUss9o}t`Z{*7wr)O$uE88OBI2`!Kx}d}IRs76*kB4kpZ?YR5 z<0YWG#nEmJX$ZbZBqWJws~HX>-Zf(6zbb927hKlU+e)e8!1S5w>lL~iO^og?tV@6W z>F#mlyHXl`*1??ae$^~cbV(8oB}*ICE^%cCk%b3g*TS;npn1tD?vdRs96hhnh_m(C z1{bB=L}PetZcSs%2*C&(8Cp*k8rNTuV|+3ZOuI(M$Q)T36#J5CitC<6A-~eCe8xVM z-4JBT$wa50j=r(;oLTlQJ8NM_tg5kOWNkCfgxNRHoR56dD=V$cE(B5+Np=gA@yq|7sU}d?VTi$bG9)&UK1K1JI#8cQGumx&HlUfVYMQxz7R@MG>`Ywfj2=$Y zt9k>P4Go)X(=5nAZPS-^luc8n%%Nu+Ug_>%Gl{bnLB6id=rifDdZn8YWzAH%0nPvz z*H6T!WnmHeE9Okvth>uxxpE_&A+dgo(Wtl$s+Ak*#?3igOTX)VdD4~kRL-A>#jOTiwn2pYI;l?b qd7AwmcWqyDb6+3)-)N1EwUV?1zU2YmIs7#KavPgDt{eV)y8i{dl%F&J literal 0 HcmV?d00001 diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000002.json b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000002.json new file mode 100644 index 000000000000..e8ea7c46663b --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1700647175783,"userId":"615774135840106","userName":"marius.grama@starburstdata.com","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"2900318529393697"},"clusterId":"0905-151610-v55wl6f5","readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"839"},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"81f1dc19-e075-4b65-8a4c-78e592a200d1"}} +{"add":{"path":"part=300/part-00000-7b5e9086-abfc-4a58-8c2c-d775a1203dd3.c000.snappy.parquet","partitionValues":{"part":"300"},"size":839,"modificationTime":1700647176000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"a_NuMbEr\":3,\"a_StRiNg\":\"osla\"},\"maxValues\":{\"a_NuMbEr\":3,\"a_StRiNg\":\"osla\"},\"nullCount\":{\"a_NuMbEr\":0,\"a_StRiNg\":0}}","tags":{"INSERTION_TIME":"1700647176000000","MIN_INSERTION_TIME":"1700647176000000","MAX_INSERTION_TIME":"1700647176000000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000003.json b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000003.json new file mode 100644 index 000000000000..8253647a5984 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1700647181681,"userId":"615774135840106","userName":"marius.grama@starburstdata.com","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"2900318529393697"},"clusterId":"0905-151610-v55wl6f5","readVersion":2,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"839"},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"4400b746-d6d0-44c4-badf-803a8e3e815e"}} +{"add":{"path":"part=400/part-00000-e90e48ef-2b52-4d32-be2f-9051e90920ce.c000.snappy.parquet","partitionValues":{"part":"400"},"size":839,"modificationTime":1700647182000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"a_NuMbEr\":4,\"a_StRiNg\":\"zulu\"},\"maxValues\":{\"a_NuMbEr\":4,\"a_StRiNg\":\"zulu\"},\"nullCount\":{\"a_NuMbEr\":0,\"a_StRiNg\":0}}","tags":{"INSERTION_TIME":"1700647182000000","MIN_INSERTION_TIME":"1700647182000000","MAX_INSERTION_TIME":"1700647182000000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/_last_checkpoint b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/_last_checkpoint new file mode 100644 index 000000000000..04273e03e014 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":2,"size":5,"sizeInBytes":20131,"numOfAddFiles":3,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues_parsed","type":{"type":"struct","fields":[{"name":"part","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"stats_parsed","type":{"type":"struct","fields":[{"name":"numRecords","type":"long","nullable":true,"metadata":{}},{"name":"minValues","type":{"type":"struct","fields":[{"name":"a_NuMbEr","type":"integer","nullable":true,"metadata":{}},{"name":"a_StRiNg","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"maxValues","type":{"type":"struct","fields":[{"name":"a_NuMbEr","type":"integer","nullable":true,"metadata":{}},{"name":"a_StRiNg","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"nullCount","type":{"type":"struct","fields":[{"name":"a_NuMbEr","type":"long","nullable":true,"metadata":{}},{"name":"a_StRiNg","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"checksum":"f454180a2c72b7503ff84cf161c8fb9e"} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/part=100/part-00000-c707739f-45c0-4722-9837-4b11599ea66d.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/part=100/part-00000-c707739f-45c0-4722-9837-4b11599ea66d.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..79484e2495300c9fdb0db1f29d4cf62f1c8956f6 GIT binary patch literal 832 zcmah|O>fgc5M4W^wICdjva2<+AhKxGB4>${w8c>_e6%W3u&NR+1yrq_O={I~QhNic zqFh1Z$O&=djKqyICnSCkXM`BrDUFl^m+{Wbn>X{uy9W;rZH#aUQ~cxI*FV<`tb)6W zHH5C-Q4vA~MF9)3oA?&~{_Xe2;sWG)Er%2U!b7g#P<6VJ;^!Z%>I9rLTuAeood*q~ z;XTMA-5@URq$kL<_lwDx$EU}MeK9h%mJ6J~@0}6?% z8@g4|U^IPeRfG2b3b0C4@{sIRw01sPZVB4R2F*Q~0TevMXt9haQ#&9T}0k1M6GD#gHi&;FR4C?2aJz7sAQR7CWE0k z8byMx?MnaN^X?-=jU`Yh2HZb?;&z>FiV&qsfY%~Uf>A`TG+2|_h;A++lCzM(hR-HK zvUv1{4Ta=AE;;?IOig({7G}##WIXYuX_@^%guTRUy_lL2AEv2`9|6fZIEbVeh!_IV zBn-J10u1VfWnNI5t?6uLEl}jEyrh7BK{WUu!>bvhx0m$fD|yVN=)wlmtsKqeRflnT zzK^oa>^f^yQ=wiQ_@_y=R;$&^8{Q-cd*yobev>;}Jx|!%9O4bp5MJH!>ovRYIQ_mO X_|A?k#HMb+bxUvD7q}`1@OJ(J7$3=O literal 0 HcmV?d00001 diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/part=200/part-00000-9cfa2c9c-efae-4e0a-bf25-eba86bfdce11.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/part=200/part-00000-9cfa2c9c-efae-4e0a-bf25-eba86bfdce11.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..35e6edf6cc93bd433b7f4d6d4637a3fdce60adfc GIT binary patch literal 839 zcmah|&x_MQ6rRLdqZC2t3=;@gf?IZ>9hOs}_Cn>mPXtufNGh{2T4rb4o0 z{DF;xgs1eazPGbDFz||Af84M z_amS|y{Oq&(spMypVw9-@^LOD;Jh#%{7>QS9MQXLe)5q#;Zk&Af@&(q3w!lZQd!KS z;xMPlT6JaAPr_iB)*FpRv$E+=!>CtjwjZ>)yWR7Jv%>*ziI(u2ZqRHveb?>xUBP#E U9U->N8eBPb>wds>IfN(l7m%yW$N&HU literal 0 HcmV?d00001 diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/part=300/part-00000-7b5e9086-abfc-4a58-8c2c-d775a1203dd3.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_case_sensitive/part=300/part-00000-7b5e9086-abfc-4a58-8c2c-d775a1203dd3.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a09b84c57fed74e458a77c8a241f5884c040245f GIT binary patch literal 839 zcmah|&2G~`5MH}!u~3ABlwGYQOIQ|-TFF`BByDlj3qP#@fmM}oDWGcYY=TwCN$oYL zit+|Mazvn>xFGQWya9JEJPAj}c1j}Uz-8An^UXK&jc1P@96K1{8m9Q$?=M%1fmJX! zu!hjhyDCCxO<98zdTUp7~s1Z@?qmKO8?BB2ye{ZVj@d{Qt&bfgc5M7&4tSTTOWmjv-5|%}yR!WvQNn0HC!bdAWU{xhtN>R0THpZ&sr1lzA zqMW$)$dOCM58xkgMdHq(|D-n<$0>=F1D9RT%$qm!#A&DVL%P6U?V_IFnZ&B<5@$ zrp@doYt?h1UK08zX}!^CG|hD{3!|RdY(H#sXS3%Cdy9j=C0fF3I)1ZZ_Z_F-cLd+w UwuRWxYp`) +USING DELTA +PARTITIONED BY (part) +LOCATION 's3://?/parsed_stats_struct' +TBLPROPERTIES (delta.checkpointInterval = 2); + + +INSERT INTO default.parsed_stats_struct VALUES (100, 1, STRUCT(1,'ala')), (200, 2, STRUCT(2, 'kota')); +INSERT INTO default.parsed_stats_struct VALUES (300, 3, STRUCT(3, 'osla')); +INSERT INTO default.parsed_stats_struct VALUES (400, 4, STRUCT(4, 'zulu')); +``` diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000000.json b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000000.json new file mode 100644 index 000000000000..87703a34a689 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000000.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1700645572508,"userId":"615774135840106","userName":"marius.grama@starburstdata.com","operation":"CREATE TABLE","operationParameters":{"partitionBy":"[\"part\"]","description":null,"isManaged":"false","properties":"{\"delta.checkpointInterval\":\"2\"}","statsOnLoad":false},"notebook":{"notebookId":"2900318529393697"},"clusterId":"0905-151610-v55wl6f5","isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"f65a01af-26a6-461a-9582-1e662891f5e3"}} +{"metaData":{"id":"5a789ae0-5f44-4922-aedd-a959411e05f4","format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"part\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"id\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"root\",\"type\":{\"type\":\"struct\",\"fields\":[{\"name\":\"entry_one\",\"type\":\"integer\",\"nullable\":true,\"metadata\":{}},{\"name\":\"entry_two\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]},\"nullable\":true,\"metadata\":{}}]}","partitionColumns":["part"],"configuration":{"delta.checkpointInterval":"2"},"createdTime":1700645572108}} +{"protocol":{"minReaderVersion":1,"minWriterVersion":2}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000001.json b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000001.json new file mode 100644 index 000000000000..d7fc9666087c --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000001.json @@ -0,0 +1,3 @@ +{"commitInfo":{"timestamp":1700645578101,"userId":"615774135840106","userName":"marius.grama@starburstdata.com","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"2900318529393697"},"clusterId":"0905-151610-v55wl6f5","readVersion":0,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"2","numOutputRows":"2","numOutputBytes":"2329"},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"ce3db1cf-1d23-4d68-8d05-8c20f22e0b88"}} +{"add":{"path":"part=100/part-00000-7d5755b3-20b1-4039-920b-d1f24022685e.c000.snappy.parquet","partitionValues":{"part":"100"},"size":1161,"modificationTime":1700645578000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":1,\"root\":{\"entry_one\":1,\"entry_two\":\"ala\"}},\"maxValues\":{\"id\":1,\"root\":{\"entry_one\":1,\"entry_two\":\"ala\"}},\"nullCount\":{\"id\":0,\"root\":{\"entry_one\":0,\"entry_two\":0}}}","tags":{"INSERTION_TIME":"1700645578000000","MIN_INSERTION_TIME":"1700645578000000","MAX_INSERTION_TIME":"1700645578000000","OPTIMIZE_TARGET_SIZE":"268435456"}}} +{"add":{"path":"part=200/part-00000-22aeb477-f838-44b1-a897-4b0e7d009ae4.c000.snappy.parquet","partitionValues":{"part":"200"},"size":1168,"modificationTime":1700645578000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":2,\"root\":{\"entry_one\":2,\"entry_two\":\"kota\"}},\"maxValues\":{\"id\":2,\"root\":{\"entry_one\":2,\"entry_two\":\"kota\"}},\"nullCount\":{\"id\":0,\"root\":{\"entry_one\":0,\"entry_two\":0}}}","tags":{"INSERTION_TIME":"1700645578000001","MIN_INSERTION_TIME":"1700645578000001","MAX_INSERTION_TIME":"1700645578000001","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000002.checkpoint.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000002.checkpoint.parquet new file mode 100644 index 0000000000000000000000000000000000000000..20aa1e4ff52c73bfbb7acb1fe72b8aeecca333f3 GIT binary patch literal 21976 zcmeHP4RBLec7Bp&TQo;VviwKPPr{!~9mnf9gtAH4XX}}0ktIix z2?R)K%48ju$$D6Zux^IXOp@7BHc3-8WztT-}w&gd@dd8o74*m%quelR|jRBUX;f z7jig`ga0?+{ly%zGl!h{%i|wdbR4djnkkEY=4Rm3BSvELD?OgMrUHx0V={f}s`Xdb zR#$gaxxAi^N|(=5Rqv{Yq|0B~<@I^I-kQ2<$=&Jkc-+yD*weGo4G>TDN-@&Pd23p^ zPrY7I>hRUpy1MGB>Rdh_fQWVVwJu+WN2>LEJoTdFOGV#2bstv+0R8^@u1>GFN^<$S zqz;#_&R6HE_j{{dAW4n4tJ5P@`#kOytsJqC5>iTD{PoYz7d8+xv2dK#4!>xfKz%ca zgZ%!3V?Uymw}5){IBrUvnW~p+;8G5k1Lc;%zgK^9>IcQDlAzo@f?Up$o42Y7n}Y)# z;g?tbs`7Y%asm8%>-9sgfHP+)+I_*o<(eA$Q6TvINcje#%g8J;aPmZ-r2!rw zyQHCH_V@nqLlein?UTO&x|9fH$KWr9DKrWX(1j|rDaQguu=2nC9||%3U}1b z0TbCwJ~;dGht>vSMNTXI#UM&k%fn0C+M6C~S<~M1;L>T3^I%iUniV`}xqsC>o_nkm zavo}hcTKC8u4%u&ZTZsnH4j4?SXK}Z`HTPjdXt^l0iSA=Z%2-~2k*XBUh>J6FHc!@Oa)p^V482rui%QfD~gk2>a6Ryeu?p_O8= zSAwTaj(|V6v?vk|$4pboq);rfaZNZRZ0Rf9Bp@$bEH;d*&F& zRkh)-nq!rXz^52S)J1&EISa_H7C?EIU*)=!e9%{X3jxF!yv%W zkqIO#_;2_8-6@zy6a|B@^aIl3Oto#`{PjYtB%7&J^?`;np#S;RAO6ggOb<%B0rWTD z(55rwcY-ZC^~%40Y*zaLSeVHkoh71O%MO;|!Yi+TrY}|OQ|u6E8~Y3BI~}wC@9&== z9pvvn+WPAvRoP%U=N_a3-pb7a9af9Ab>M_}T-9AZpUYS8^}0mK?{|sy)%Cv0O34HH zik>eI{ncCgSyZtj7PTG5dOr)s@h8WBJD_(CrY)gz6mj_=hw3+RU`)%%^^@C|s}e)^ zNN9M0^^f$WuI7>>W^(xGTOaEEg6n2^`nPgV!GyNS5!=`!!Au^FMS449jtWOtKnkKM z_%2umvDKLVJ#pmh+{zr zOjJ(_>*7Rj+$NN!biFl% z5}o@@^=13mD-Z=!dJt|oIYX)b-r2%@UOkpcp85#kwD4cN4P zXe{`muA-(zD-LV?Kr^XHJr(8w6%O0<**W0FL1F5>X)G)^00T^y`~fl5q)r}|m;LJ- z*yw+pP|)_XGO_d-7DKIDGnv}*VJ^XNh)qya8I!`$8QU(S$;PP)Y_U=>(KyqgGa-m> zRoE1>B32)=h0zS^7_W4wCNIo@UR{P&HJ;Fk)uisVI*f2M>9f&H!l0wpGNZQ1rkmP% zD_O)N2Rq-$Qhg?t95Ru?9HqdKT+*r}cju9qlH8t8f=cp`l`K<|&livfl;nj%@|co5 zSwz04B;%7Aj}a{yFV=FkWPFO2t0m(lTCSFiPbE84fe)3ERSLTkge*~#yLqx&NnX;D zNADmZCHJ7f3hcD8O=<<2C{3gb7*NQ;sj zDPslVceCV(mW(^JTrC;DN6Xcc@mX4~mWlaOB$8J*Rh z2}9+2M`KYGri*oJqTu~@i=>FFHVZki4IzkxlA_sWAs6DeP5#+jvmFPQ1#(y6gc$F9K>WWG+n6Ths5>}-t^uWfL6jk)vDMtK(kQfZaHYzy~OKS^1-4yam8|0jV z4lycWmS29p!!LD-y}=k{Ee>~g2VyJLmZA!b#>7}u5do^g8=7McJVzxzG0P%RhZnV0 zMCsJGR+&|y797Mf1uQs-&4)EI2WWS&pbMtv6);y$)|&DRt36Gs0;r|ZJtm5z1|2DX z>@Y@|Fi$U&?ipmldgVo=?(lkvOiS+2bSz9&y94$)&{KC$a>t`Ru~ga+lc44N%K|~^ zL1j^l9B$5_0N`(CPX)oP7&BM`1DBM@T8Cy+2M(Q8Ao`Fn$R_!vXlEqQgTqqmNO@i1 zNViDa9;vK7k?{I}Uy5iY^23y5A~OOV{6+HI`7jMvXz(R$d@T+Kd%Hu(C$G6dp6@Kd zKs1I!4Q0!RIi(xMhDJNNGaTv)tnH1^32g#X7VV{zIwKO!&var$4TCOX;m&Xnc8us? zErH?NCW)XOHe=uz#_UHU0f>dD+1RfD3kog+#NLRMO{I#TrdZ>l^oP5}Ku8@os%^@X zvlX>bRVFGBffAd%X;A#E5qp@%GHa+Qwm{MHx`3M4P1ZnHgSuib1}Gmal}#M~9>4pfmD#i9wt=l3$j^F_4eu&oi(QBiK;{>%Wic zYAWk#RF+)o##7R5h5U{jXM8HoVMC_$!Pqe)sTzs0y58ND&l?+l z9GIgl;HdGV?cC0Xtd8t0au^qj6@*Eu#Ht>Othx+7Z^I?NJQfV!0* zWwM@s76_bs2$&WOz~M(r0lyTvDdxx*+kpMG`Dj{p^Q@kYI7cS1*H)UaET96c}$!U&+_(G8b3vzA8Hx-89f=dgWZ(wszT2B|c|AdR{LQo=LZA_(23;pqkodmf3OdjO5% zzzn%6xad=elS4Rkydl!S3)opobCk8U=l$EOSuiEf?}D_!j3IWd>P4M%X=z zsnZIu>bgj6LbfzQt&JFoFCy`~lsJAjYyUchI3p+eZi77A9s$UcvOH-aNA;tPr~`<4 zSr*j6=EqS4O>(6j1{vQ*wEiVjXKB$=d(%e9BMA8>g^b_Bnqd?HC#7_#?=eVu8X=F# zQl^JY=~EjaFC*l6S;|>VN_EJla;s+XH%Cd)7w0y@>Y)#f#5oJ+Pbw zD^+W-gRZsXvl}woTF$H#Z!OPgqdGG*z1t993I65aQu3yWa}GLF)XRd=RyG3-It^+* zhHAOI4As2t-W25e*epZYcCP{DETZ%;N0dPgrH(0AiRH2wE=6kNS3>CqwujCVpe=P@ zo#pB4d=SbcivK1&8;T>ITMgJjD9?9^fxfXM?F+FI+b2spQE}i ztsu|BzKlC_@pa}I!)e>yu%+RJ9}tLs>sf4F1sb~eC%C0?7FeiD9M&TGeORjhX1*0l zHfV?1v=rd8qi$NRp4)EZ*58aP^mpd6rA9g1+)dilr7V~5jZhq#9AkA#v<#YDd(yKw zfGj?0#wNcu8jFMtV#eYcvcQ9P@^7e(?n%$$FtWH#SseCg&7$s58+MzMyD+Xb4|@#0 z*vAOE|3UPb`YTg_CT>Kh2JNpjfL=tkDZreg zFnhfyTS}P3(Zta9dJRGiAlkMTtmzp*QzH@vq13%^hI7V<^ETqVia7jN4Sb=~%I~g% zOP;pJ1Y0<=)-Cpka6ZgUkBYftaMootT=x_5m&Q8hKGMFJ6YNQag3?;Cb7NCWdwmVu zun>f#0?H{eA-Bryb60Z0oTL)br-JImVRt0_v|GJPBK(&-6HgKL^qG&CNqKtEjZtLi z5mwO4Fz5y=_0a%=;q(rrMy@+XmmNe}pH$-kv2=!=iAM178Y9SBA?)~zvWKGj+Kh9z z*aPUWosXyHmQ)Vs>hhlILB-#Q_xO~MZokyr2P#{KmTt+Z-dfgcD&0LACUMpv^0gaApGl3?t2vA)Yo^Lg z9tOa;-jX>jtx?o^g#M}!lQ!$>vZ-9TS;UZ7y~RjG+z8pq%_3tk1+Ev3JC2fYH7S|% zEz!89P~FugCQC-fI*UeAgVBruQ}YtR=+ijX08TyB5mGt-)PMDb#o0r4;a7$A{Ul}9( zt6@^YKWgFXj+7~}@UlvGrPp2MLT|Vu66jnP#mi&fvht4JK+s?2g}>V=)>r#Gq{Gf20)z^1*)k|V+ZKWj5f!lg;`}`aI!*ws@xK{X^uKxpB2&TyZ literal 0 HcmV?d00001 diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000002.json b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000002.json new file mode 100644 index 000000000000..e92c4c7cb22a --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000002.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1700645581464,"userId":"615774135840106","userName":"marius.grama@starburstdata.com","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"2900318529393697"},"clusterId":"0905-151610-v55wl6f5","readVersion":1,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"1167"},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"c77efc08-5797-458e-8b55-11b4ff6d1325"}} +{"add":{"path":"part=300/part-00000-d9fc223e-4feb-4848-9d25-03962fc0e540.c000.snappy.parquet","partitionValues":{"part":"300"},"size":1167,"modificationTime":1700645582000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":3,\"root\":{\"entry_one\":3,\"entry_two\":\"osla\"}},\"maxValues\":{\"id\":3,\"root\":{\"entry_one\":3,\"entry_two\":\"osla\"}},\"nullCount\":{\"id\":0,\"root\":{\"entry_one\":0,\"entry_two\":0}}}","tags":{"INSERTION_TIME":"1700645582000000","MIN_INSERTION_TIME":"1700645582000000","MAX_INSERTION_TIME":"1700645582000000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000003.json b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000003.json new file mode 100644 index 000000000000..8051b7571f4c --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/00000000000000000003.json @@ -0,0 +1,2 @@ +{"commitInfo":{"timestamp":1700645587551,"userId":"615774135840106","userName":"marius.grama@starburstdata.com","operation":"WRITE","operationParameters":{"mode":"Append","statsOnLoad":false,"partitionBy":"[]"},"notebook":{"notebookId":"2900318529393697"},"clusterId":"0905-151610-v55wl6f5","readVersion":2,"isolationLevel":"WriteSerializable","isBlindAppend":true,"operationMetrics":{"numFiles":"1","numOutputRows":"1","numOutputBytes":"1168"},"tags":{"restoresDeletedRows":"false"},"engineInfo":"Databricks-Runtime/13.3.x-scala2.12","txnId":"71e67df6-676b-43b3-81f7-666b34d65aa1"}} +{"add":{"path":"part=400/part-00000-da80387a-f286-4848-9853-f82afd408710.c000.snappy.parquet","partitionValues":{"part":"400"},"size":1168,"modificationTime":1700645588000,"dataChange":true,"stats":"{\"numRecords\":1,\"minValues\":{\"id\":4,\"root\":{\"entry_one\":4,\"entry_two\":\"zulu\"}},\"maxValues\":{\"id\":4,\"root\":{\"entry_one\":4,\"entry_two\":\"zulu\"}},\"nullCount\":{\"id\":0,\"root\":{\"entry_one\":0,\"entry_two\":0}}}","tags":{"INSERTION_TIME":"1700645588000000","MIN_INSERTION_TIME":"1700645588000000","MAX_INSERTION_TIME":"1700645588000000","OPTIMIZE_TARGET_SIZE":"268435456"}}} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/_last_checkpoint b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/_last_checkpoint new file mode 100644 index 000000000000..0c989170ebd7 --- /dev/null +++ b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/_delta_log/_last_checkpoint @@ -0,0 +1 @@ +{"version":2,"size":5,"sizeInBytes":21976,"numOfAddFiles":3,"checkpointSchema":{"type":"struct","fields":[{"name":"txn","type":{"type":"struct","fields":[{"name":"appId","type":"string","nullable":true,"metadata":{}},{"name":"version","type":"long","nullable":true,"metadata":{}},{"name":"lastUpdated","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"add","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"modificationTime","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"tags","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}},{"name":"stats","type":"string","nullable":true,"metadata":{}},{"name":"partitionValues_parsed","type":{"type":"struct","fields":[{"name":"part","type":"integer","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"stats_parsed","type":{"type":"struct","fields":[{"name":"numRecords","type":"long","nullable":true,"metadata":{}},{"name":"minValues","type":{"type":"struct","fields":[{"name":"id","type":"integer","nullable":true,"metadata":{}},{"name":"root","type":{"type":"struct","fields":[{"name":"entry_one","type":"integer","nullable":true,"metadata":{}},{"name":"entry_two","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"maxValues","type":{"type":"struct","fields":[{"name":"id","type":"integer","nullable":true,"metadata":{}},{"name":"root","type":{"type":"struct","fields":[{"name":"entry_one","type":"integer","nullable":true,"metadata":{}},{"name":"entry_two","type":"string","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"nullCount","type":{"type":"struct","fields":[{"name":"id","type":"long","nullable":true,"metadata":{}},{"name":"root","type":{"type":"struct","fields":[{"name":"entry_one","type":"long","nullable":true,"metadata":{}},{"name":"entry_two","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"remove","type":{"type":"struct","fields":[{"name":"path","type":"string","nullable":true,"metadata":{}},{"name":"deletionTimestamp","type":"long","nullable":true,"metadata":{}},{"name":"dataChange","type":"boolean","nullable":true,"metadata":{}},{"name":"extendedFileMetadata","type":"boolean","nullable":true,"metadata":{}},{"name":"partitionValues","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"size","type":"long","nullable":true,"metadata":{}},{"name":"deletionVector","type":{"type":"struct","fields":[{"name":"storageType","type":"string","nullable":true,"metadata":{}},{"name":"pathOrInlineDv","type":"string","nullable":true,"metadata":{}},{"name":"offset","type":"integer","nullable":true,"metadata":{}},{"name":"sizeInBytes","type":"integer","nullable":true,"metadata":{}},{"name":"cardinality","type":"long","nullable":true,"metadata":{}},{"name":"maxRowIndex","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"baseRowId","type":"long","nullable":true,"metadata":{}},{"name":"defaultRowCommitVersion","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"metaData","type":{"type":"struct","fields":[{"name":"id","type":"string","nullable":true,"metadata":{}},{"name":"name","type":"string","nullable":true,"metadata":{}},{"name":"description","type":"string","nullable":true,"metadata":{}},{"name":"format","type":{"type":"struct","fields":[{"name":"provider","type":"string","nullable":true,"metadata":{}},{"name":"options","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"schemaString","type":"string","nullable":true,"metadata":{}},{"name":"partitionColumns","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"configuration","type":{"type":"map","keyType":"string","valueType":"string","valueContainsNull":true},"nullable":true,"metadata":{}},{"name":"createdTime","type":"long","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"protocol","type":{"type":"struct","fields":[{"name":"minReaderVersion","type":"integer","nullable":true,"metadata":{}},{"name":"minWriterVersion","type":"integer","nullable":true,"metadata":{}},{"name":"readerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}},{"name":"writerFeatures","type":{"type":"array","elementType":"string","containsNull":true},"nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}},{"name":"domainMetadata","type":{"type":"struct","fields":[{"name":"domain","type":"string","nullable":true,"metadata":{}},{"name":"configuration","type":"string","nullable":true,"metadata":{}},{"name":"removed","type":"boolean","nullable":true,"metadata":{}}]},"nullable":true,"metadata":{}}]},"checksum":"94cfd4172a07fe6113ed85a12cb38d74"} diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/part=100/part-00000-7d5755b3-20b1-4039-920b-d1f24022685e.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/part=100/part-00000-7d5755b3-20b1-4039-920b-d1f24022685e.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a081afcdf18b51d5e02a5524922922b843340ea8 GIT binary patch literal 1161 zcmb7EL2DC16rP>L8rvcWonclM42un^v}-m=TN83oYpEikNWIjSGD#+J>1MaxomfK1 zQS{tH|Aij>1wH89i#Ly+ym%Fa;+x%VS_#+|mbdfXe(#(2zBfDT4`0{>kr~3smv$CKwEQ~8vFV3c4Ck|l3{|N4_Thq+l!FzUr! zuHV8sORI$BMX*l6`A;~d(wMRd)}R@(3Q0&eXq~Jjt0Rrn)e=BkYt!K#4|aFL=4;XA zBn`n?%&^l!inp%;(mUOye6QpC;pUCF@mm7}}GsZx!=%z#1LX^4Ddz-q2qh(TFEL~ zHez#Qh;ZMvV4nLuA+5kau)0F>7MGlT!UI$G_k>wBLmBj%(wsNjj&NI{S$#DyJ>FHH zoEC%=nBz&&5dj3ep6l|43joLz^X4h3Sse_A^P^;SSVrlW*==;7Mp@>SH;p-9c4r*W zpW}5ds(X8QL1WuP#O{m<9M5QlOR)hXRb$#r^j_!&*#RX+bI92%P3_xKJn* zirIXl=eVtGvHYOSD~qj$u$MT*OQIwi#Y(eSu-lbNyIm1{dD#|XPOrg#6+h?$Tnp=P G3H=3O9u`Xg literal 0 HcmV?d00001 diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/part=200/part-00000-22aeb477-f838-44b1-a897-4b0e7d009ae4.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/part=200/part-00000-22aeb477-f838-44b1-a897-4b0e7d009ae4.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..63e610a2c59922707c9e37f06bdfeab08f70ffe8 GIT binary patch literal 1168 zcmb7EL2DC16rN0CO>IGh&M+&BhQ)>yk~Nz&ZBuelYpGB|kb0>tWs*$dlFe?rJF$e4 zlU}?_(I4Qwdh+Z~5WITqMFj6&e3RWKk$|nR%Gtvz1gijcxn+u76>EnKfn4m zs}l_23`rt%;~qu`#bauP93{8Oo8KRPBqtzqnYe;r6k+ZVgPi?5KbxDt++2$=34nGy z$+cTpV+mCe-3Znw1pi78>sUhFBD7AEWE0Aerqe3fik?nmq^2GLw6`@M9CE*N5Hwzh zE++{{E@Fo5CMsc;@I6mn%>DL&==_LIgTL8QFnCAIy zlOJ;M)f`%EO1mp|!9I5Vv25Ibw(|%veG24=HgEKwRCmfN3?as*0563f*q+O-70qSS zLTqUY5ga)t%=D-yr0ILdW>-kwEFgG_pp^7EUuT)}NmkF7K*W zb`#PO%yy+{3m+10&vAI&0RU8rtZ_kVte*@9*-^1NFr)IT>^3@7qblRSHw`&peq#>k z&vx6F>E0h)(%Ak0vAa_ShcnvXQtZG;)tGi8zjIwb6%8n5nO4Cp=9Kew-)?k*Tt1&K zq%!rM?KD$`<+WvAE;Z}ITH%l{ilV3&%8f$aYL&~aR$1`XRZEB^tpeXxc%hH*lc>SJ F=nw0#8A<>E literal 0 HcmV?d00001 diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/part=300/part-00000-d9fc223e-4feb-4848-9d25-03962fc0e540.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/part=300/part-00000-d9fc223e-4feb-4848-9d25-03962fc0e540.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b7fb0541e9da1cb9de7c1c2b616a47465aa67120 GIT binary patch literal 1167 zcmb7E&ubGw6rN0CU9})WXPA{>!(u}U>6%T_A6t6yS5YV-NW9dRGD#+J>1H?GomfiA zMf?k-2%f~d7sX4#gMWYr&*IUue}iwb+f5{3D=f47zWv@e?|pA}>vx}61d%zy$cG>A z-lwtzLpVb;gs$Gk2%&T;4J%-j+$3M#p3YA&t`*d~G)6Jy7BR@H({G=(3C{Jkn3Dmj zAG%z>fpwNqV0&%6 zrkLKNn|W;`YA0GLDvX+oA}9~6gRMd2HzsrRSBjV@UrUqT9f2 zPhfhpQGtKSLL(#DEm{W-=qewhRaHBVCaZmuCX{|vy7YUB;=D$Grslxz9j5z1*W`T; zzM3P8P3iQ+HrU55Ka{OIPaF3U%TAG8(dDh9$F+vN!VqF?its`Nq2qh(QpK#A7GjH2 zjPSrUVWtNIAI-At2xTy6Nuy|V9O1S@WBu8&;qjh&<+K5g zX^tmFR|Eif1J~tE7YL9kipH6!v3@)p7L#OkV3PEU?3Ns=B+K~kO(PAM-&h0sbG+_( zb?*+(X>50h*!+~i(TpB&DHnI=0>E*n+RFT0$)98}MyK7y1bQgF5_*{s0av B7wG^1 literal 0 HcmV?d00001 diff --git a/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/part=400/part-00000-da80387a-f286-4848-9853-f82afd408710.c000.snappy.parquet b/plugin/trino-delta-lake/src/test/resources/databricks133/parsed_stats_struct/part=400/part-00000-da80387a-f286-4848-9853-f82afd408710.c000.snappy.parquet new file mode 100644 index 0000000000000000000000000000000000000000..eb85efaaad2646c8f7903947fb90a882e2722e5d GIT binary patch literal 1168 zcmb7EO>fgc5M6IV8z2ZFWmjutL1fV=m0A<$qphP}fKnByiXv1lrB$^_Ho>Z6r}l=b zR5_8KfGV#101oAjgt&0w#ElC_&in%y$8H)Fi5AIPd&Y0xzIn5D{lQa{Ad(@By!-O~ z^$m?+2xmwNp=A zM3<8!B0b_-Q7OZuKC&vI9VEB&KA&lQNI0Taja(3?Y>8qiU!YU*k(1#PL7nG^^I z8iFaN59xX?wHmb(trX=)%|#K&1MgsKAo+z!hW=EDfpWDmQ+}l6T)~KSjc(*>mt-9# zfb|UySx*#=q$7cU(Ly65+D%#q4Dg0NMr)|{JepMdHbE%;9P-j{3dOk;{h?&Q?j5Fj ze#hWF4!#;Aiw$Xa#WvW-W*^Ju{byT`5YwhWj_B~_(UaPiwZsr&YzpvF_<`-Y>`KWf z8zy4&Q;6WeF<_<#eIX6sJ2bjN@)nnzeZa@MJn9L(st3~VH>JLyw{79H0=@eDSa*3h zd}X&F9m8x_ijMFh;r1PeHyi*!rC89APDRZmRY6x?QL%t+RqEWP(#e&(ktajTHe0kXvVqRT^Z!5acDf}er H@GtrUD90Jo literal 0 HcmV?d00001