diff --git a/api/src/main/java/org/apache/iceberg/DeleteFiles.java b/api/src/main/java/org/apache/iceberg/DeleteFiles.java index 74d31a6dad81..8a396920e03b 100644 --- a/api/src/main/java/org/apache/iceberg/DeleteFiles.java +++ b/api/src/main/java/org/apache/iceberg/DeleteFiles.java @@ -81,4 +81,15 @@ default DeleteFiles deleteFile(DataFile file) { * @return this for method chaining */ DeleteFiles caseSensitive(boolean caseSensitive); + + /** + * Enables validation that any files that are part of the deletion still exist when committing the + * operation. + * + * @return this for method chaining + */ + default DeleteFiles validateFilesExist() { + throw new UnsupportedOperationException( + this.getClass().getName() + " doesn't implement validateFilesExist"); + } } diff --git a/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java b/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java index 152b62b443c5..495a30e5475b 100644 --- a/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java +++ b/core/src/main/java/org/apache/iceberg/BaseDistributedDataScan.java @@ -18,6 +18,7 @@ */ package org.apache.iceberg; +import static org.apache.iceberg.PlanningMode.AUTO; import static org.apache.iceberg.TableProperties.DATA_PLANNING_MODE; import static org.apache.iceberg.TableProperties.DELETE_PLANNING_MODE; import static org.apache.iceberg.TableProperties.PLANNING_MODE_DEFAULT; @@ -144,20 +145,19 @@ protected PlanningMode deletePlanningMode() { protected CloseableIterable doPlanFiles() { Snapshot snapshot = snapshot(); - List dataManifests = findMatchingDataManifests(snapshot); - boolean planDataLocally = shouldPlanLocally(dataPlanningMode(), dataManifests); - List deleteManifests = findMatchingDeleteManifests(snapshot); - boolean planDeletesLocally = shouldPlanLocally(deletePlanningMode(), deleteManifests); + boolean mayHaveEqualityDeletes = deleteManifests.size() > 0 && mayHaveEqualityDeletes(snapshot); + boolean planDeletesLocally = shouldPlanDeletesLocally(deleteManifests, mayHaveEqualityDeletes); + + List dataManifests = findMatchingDataManifests(snapshot); + boolean loadColumnStats = mayHaveEqualityDeletes || shouldReturnColumnStats(); + boolean planDataLocally = shouldPlanDataLocally(dataManifests, loadColumnStats); + boolean copyDataFiles = shouldCopyDataFiles(planDataLocally, loadColumnStats); if (planDataLocally && planDeletesLocally) { return planFileTasksLocally(dataManifests, deleteManifests); } - boolean mayHaveEqualityDeletes = deleteManifests.size() > 0 && mayHaveEqualityDeletes(snapshot); - boolean loadColumnStats = mayHaveEqualityDeletes || shouldReturnColumnStats(); - boolean copyDataFiles = shouldCopyDataFiles(planDataLocally, loadColumnStats); - ExecutorService monitorPool = newMonitorPool(); CompletableFuture deletesFuture = @@ -223,7 +223,18 @@ private List filterManifests(List manifests) { .collect(Collectors.toList()); } - protected boolean shouldPlanLocally(PlanningMode mode, List manifests) { + private boolean shouldPlanDeletesLocally( + List deleteManifests, boolean mayHaveEqualityDeletes) { + PlanningMode mode = deletePlanningMode(); + return (mode == AUTO && mayHaveEqualityDeletes) || shouldPlanLocally(mode, deleteManifests); + } + + private boolean shouldPlanDataLocally(List dataManifests, boolean loadColumnStats) { + PlanningMode mode = dataPlanningMode(); + return (mode == AUTO && loadColumnStats) || shouldPlanLocally(mode, dataManifests); + } + + private boolean shouldPlanLocally(PlanningMode mode, List manifests) { if (context().planWithCustomizedExecutor()) { return true; } diff --git a/core/src/main/java/org/apache/iceberg/BaseTable.java b/core/src/main/java/org/apache/iceberg/BaseTable.java index b9ed4f8d67ce..2093753bf755 100644 --- a/core/src/main/java/org/apache/iceberg/BaseTable.java +++ b/core/src/main/java/org/apache/iceberg/BaseTable.java @@ -53,6 +53,10 @@ public BaseTable(TableOperations ops, String name, MetricsReporter reporter) { this.reporter = reporter; } + MetricsReporter reporter() { + return reporter; + } + @Override public TableOperations operations() { return ops; diff --git a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java index 2cf16bca6c32..363aabbff24f 100644 --- a/core/src/main/java/org/apache/iceberg/MetadataUpdate.java +++ b/core/src/main/java/org/apache/iceberg/MetadataUpdate.java @@ -53,6 +53,11 @@ public String uuid() { public void applyTo(TableMetadata.Builder metadataBuilder) { metadataBuilder.assignUUID(uuid); } + + @Override + public void applyTo(ViewMetadata.Builder metadataBuilder) { + metadataBuilder.assignUUID(uuid); + } } class UpgradeFormatVersion implements MetadataUpdate { diff --git a/core/src/main/java/org/apache/iceberg/MetricsUtil.java b/core/src/main/java/org/apache/iceberg/MetricsUtil.java index 2cd001b5c46f..2d23121bb0f9 100644 --- a/core/src/main/java/org/apache/iceberg/MetricsUtil.java +++ b/core/src/main/java/org/apache/iceberg/MetricsUtil.java @@ -41,18 +41,36 @@ public class MetricsUtil { private MetricsUtil() {} /** - * Copies a metrics object without lower and upper bounds for given fields. + * Copies a metrics object without value, NULL and NaN counts for given fields. * - * @param excludedFieldIds field IDs for which the lower and upper bounds must be dropped + * @param excludedFieldIds field IDs for which the counts must be dropped + * @return a new metrics object without counts for given fields + */ + public static Metrics copyWithoutFieldCounts(Metrics metrics, Set excludedFieldIds) { + return new Metrics( + metrics.recordCount(), + metrics.columnSizes(), + copyWithoutKeys(metrics.valueCounts(), excludedFieldIds), + copyWithoutKeys(metrics.nullValueCounts(), excludedFieldIds), + copyWithoutKeys(metrics.nanValueCounts(), excludedFieldIds), + metrics.lowerBounds(), + metrics.upperBounds()); + } + + /** + * Copies a metrics object without counts and bounds for given fields. + * + * @param excludedFieldIds field IDs for which the counts and bounds must be dropped * @return a new metrics object without lower and upper bounds for given fields */ - public static Metrics copyWithoutFieldBounds(Metrics metrics, Set excludedFieldIds) { + public static Metrics copyWithoutFieldCountsAndBounds( + Metrics metrics, Set excludedFieldIds) { return new Metrics( metrics.recordCount(), metrics.columnSizes(), - metrics.valueCounts(), - metrics.nullValueCounts(), - metrics.nanValueCounts(), + copyWithoutKeys(metrics.valueCounts(), excludedFieldIds), + copyWithoutKeys(metrics.nullValueCounts(), excludedFieldIds), + copyWithoutKeys(metrics.nanValueCounts(), excludedFieldIds), copyWithoutKeys(metrics.lowerBounds(), excludedFieldIds), copyWithoutKeys(metrics.upperBounds(), excludedFieldIds)); } diff --git a/core/src/main/java/org/apache/iceberg/StreamingDelete.java b/core/src/main/java/org/apache/iceberg/StreamingDelete.java index 8ff7bb831ec9..df5a11bf31c5 100644 --- a/core/src/main/java/org/apache/iceberg/StreamingDelete.java +++ b/core/src/main/java/org/apache/iceberg/StreamingDelete.java @@ -28,6 +28,8 @@ * CommitFailedException}. */ public class StreamingDelete extends MergingSnapshotProducer implements DeleteFiles { + private boolean validateFilesToDeleteExist = false; + protected StreamingDelete(String tableName, TableOperations ops) { super(tableName, ops); } @@ -60,9 +62,22 @@ public StreamingDelete deleteFromRowFilter(Expression expr) { return this; } + @Override + public DeleteFiles validateFilesExist() { + this.validateFilesToDeleteExist = true; + return this; + } + @Override public StreamingDelete toBranch(String branch) { targetBranch(branch); return this; } + + @Override + protected void validate(TableMetadata base, Snapshot parent) { + if (validateFilesToDeleteExist) { + failMissingDeletePaths(); + } + } } diff --git a/core/src/main/java/org/apache/iceberg/TableMetadata.java b/core/src/main/java/org/apache/iceberg/TableMetadata.java index a6f1d428f41a..d1558e4a8602 100644 --- a/core/src/main/java/org/apache/iceberg/TableMetadata.java +++ b/core/src/main/java/org/apache/iceberg/TableMetadata.java @@ -68,7 +68,7 @@ public static TableMetadata newTableMetadata( PropertyUtil.propertyAsInt( properties, TableProperties.FORMAT_VERSION, DEFAULT_TABLE_FORMAT_VERSION); return newTableMetadata( - schema, spec, sortOrder, location, unreservedProperties(properties), formatVersion); + schema, spec, sortOrder, location, persistedProperties(properties), formatVersion); } public static TableMetadata newTableMetadata( @@ -78,7 +78,7 @@ public static TableMetadata newTableMetadata( PropertyUtil.propertyAsInt( properties, TableProperties.FORMAT_VERSION, DEFAULT_TABLE_FORMAT_VERSION); return newTableMetadata( - schema, spec, sortOrder, location, unreservedProperties(properties), formatVersion); + schema, spec, sortOrder, location, persistedProperties(properties), formatVersion); } private static Map unreservedProperties(Map rawProperties) { @@ -87,6 +87,21 @@ private static Map unreservedProperties(Map rawP .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); } + private static Map persistedProperties(Map rawProperties) { + Map persistedProperties = Maps.newHashMap(); + + // explicitly set defaults that apply only to new tables + persistedProperties.put( + TableProperties.PARQUET_COMPRESSION, + TableProperties.PARQUET_COMPRESSION_DEFAULT_SINCE_1_4_0); + + rawProperties.entrySet().stream() + .filter(entry -> !TableProperties.RESERVED_PROPERTIES.contains(entry.getKey())) + .forEach(entry -> persistedProperties.put(entry.getKey(), entry.getValue())); + + return persistedProperties; + } + static TableMetadata newTableMetadata( Schema schema, PartitionSpec spec, @@ -685,7 +700,7 @@ public TableMetadata buildReplacement( .setDefaultPartitionSpec(freshSpec) .setDefaultSortOrder(freshSortOrder) .setLocation(newLocation) - .setProperties(unreservedProperties(updatedProperties)) + .setProperties(persistedProperties(updatedProperties)) .build(); } diff --git a/core/src/main/java/org/apache/iceberg/TableProperties.java b/core/src/main/java/org/apache/iceberg/TableProperties.java index 03e1f3ce8897..af90303a8693 100644 --- a/core/src/main/java/org/apache/iceberg/TableProperties.java +++ b/core/src/main/java/org/apache/iceberg/TableProperties.java @@ -143,6 +143,7 @@ private TableProperties() {} public static final String PARQUET_COMPRESSION = "write.parquet.compression-codec"; public static final String DELETE_PARQUET_COMPRESSION = "write.delete.parquet.compression-codec"; public static final String PARQUET_COMPRESSION_DEFAULT = "gzip"; + public static final String PARQUET_COMPRESSION_DEFAULT_SINCE_1_4_0 = "zstd"; public static final String PARQUET_COMPRESSION_LEVEL = "write.parquet.compression-level"; public static final String DELETE_PARQUET_COMPRESSION_LEVEL = diff --git a/core/src/main/java/org/apache/iceberg/deletes/PositionDeleteWriter.java b/core/src/main/java/org/apache/iceberg/deletes/PositionDeleteWriter.java index 4f799b434993..c8193755f5ba 100644 --- a/core/src/main/java/org/apache/iceberg/deletes/PositionDeleteWriter.java +++ b/core/src/main/java/org/apache/iceberg/deletes/PositionDeleteWriter.java @@ -47,7 +47,7 @@ * records, consider using {@link SortingPositionOnlyDeleteWriter} instead. */ public class PositionDeleteWriter implements FileWriter, DeleteWriteResult> { - private static final Set SINGLE_REFERENCED_FILE_BOUNDS_ONLY = + private static final Set FILE_AND_POS_FIELD_IDS = ImmutableSet.of(DELETE_FILE_PATH.fieldId(), DELETE_FILE_POS.fieldId()); private final FileAppender appender; @@ -121,9 +121,9 @@ public DeleteWriteResult result() { private Metrics metrics() { Metrics metrics = appender.metrics(); if (referencedDataFiles.size() > 1) { - return MetricsUtil.copyWithoutFieldBounds(metrics, SINGLE_REFERENCED_FILE_BOUNDS_ONLY); + return MetricsUtil.copyWithoutFieldCountsAndBounds(metrics, FILE_AND_POS_FIELD_IDS); } else { - return metrics; + return MetricsUtil.copyWithoutFieldCounts(metrics, FILE_AND_POS_FIELD_IDS); } } } diff --git a/core/src/main/java/org/apache/iceberg/io/OutputFileFactory.java b/core/src/main/java/org/apache/iceberg/io/OutputFileFactory.java index 473272635df0..c46ca132ed0b 100644 --- a/core/src/main/java/org/apache/iceberg/io/OutputFileFactory.java +++ b/core/src/main/java/org/apache/iceberg/io/OutputFileFactory.java @@ -23,6 +23,7 @@ import java.util.UUID; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; import org.apache.iceberg.FileFormat; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.StructLike; @@ -35,7 +36,7 @@ public class OutputFileFactory { private final PartitionSpec defaultSpec; private final FileFormat format; private final LocationProvider locations; - private final FileIO io; + private final Supplier ioSupplier; private final EncryptionManager encryptionManager; private final int partitionId; private final long taskId; @@ -56,7 +57,7 @@ public class OutputFileFactory { * @param spec Partition specification used by the location provider * @param format File format used for the extension * @param locations Location provider used for generating locations - * @param io FileIO to store the files + * @param ioSupplier Supplier of FileIO to store the files * @param encryptionManager Encryption manager used for encrypting the files * @param partitionId First part of the file name * @param taskId Second part of the file name @@ -67,7 +68,7 @@ private OutputFileFactory( PartitionSpec spec, FileFormat format, LocationProvider locations, - FileIO io, + Supplier ioSupplier, EncryptionManager encryptionManager, int partitionId, long taskId, @@ -76,7 +77,7 @@ private OutputFileFactory( this.defaultSpec = spec; this.format = format; this.locations = locations; - this.io = io; + this.ioSupplier = ioSupplier; this.encryptionManager = encryptionManager; this.partitionId = partitionId; this.taskId = taskId; @@ -101,7 +102,7 @@ private String generateFilename() { /** Generates an {@link EncryptedOutputFile} for unpartitioned writes. */ public EncryptedOutputFile newOutputFile() { - OutputFile file = io.newOutputFile(locations.newDataLocation(generateFilename())); + OutputFile file = ioSupplier.get().newOutputFile(locations.newDataLocation(generateFilename())); return encryptionManager.encrypt(file); } @@ -113,7 +114,7 @@ public EncryptedOutputFile newOutputFile(StructLike partition) { /** Generates an {@link EncryptedOutputFile} for partitioned writes in a given spec. */ public EncryptedOutputFile newOutputFile(PartitionSpec spec, StructLike partition) { String newDataLocation = locations.newDataLocation(spec, partition, generateFilename()); - OutputFile rawOutputFile = io.newOutputFile(newDataLocation); + OutputFile rawOutputFile = ioSupplier.get().newOutputFile(newDataLocation); return encryptionManager.encrypt(rawOutputFile); } @@ -125,6 +126,7 @@ public static class Builder { private String operationId; private FileFormat format; private String suffix; + private Supplier ioSupplier; private Builder(Table table, int partitionId, long taskId) { this.table = table; @@ -136,6 +138,7 @@ private Builder(Table table, int partitionId, long taskId) { String formatAsString = table.properties().getOrDefault(DEFAULT_FILE_FORMAT, DEFAULT_FILE_FORMAT_DEFAULT); this.format = FileFormat.fromString(formatAsString); + this.ioSupplier = table::io; } public Builder defaultSpec(PartitionSpec newDefaultSpec) { @@ -158,12 +161,31 @@ public Builder suffix(String newSuffix) { return this; } + /** + * Configures a {@link FileIO} supplier, which can potentially be used to dynamically refresh + * the file IO instance when a table is refreshed. + * + * @param newIoSupplier The file IO supplier + * @return this builder instance + */ + public Builder ioSupplier(Supplier newIoSupplier) { + this.ioSupplier = newIoSupplier; + return this; + } + public OutputFileFactory build() { LocationProvider locations = table.locationProvider(); - FileIO io = table.io(); EncryptionManager encryption = table.encryption(); return new OutputFileFactory( - defaultSpec, format, locations, io, encryption, partitionId, taskId, operationId, suffix); + defaultSpec, + format, + locations, + ioSupplier, + encryption, + partitionId, + taskId, + operationId, + suffix); } } } diff --git a/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java b/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java index d4df7169fd1e..cb905bce09d6 100644 --- a/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java +++ b/core/src/main/java/org/apache/iceberg/view/ViewMetadata.java @@ -23,8 +23,10 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.UUID; import java.util.stream.Collectors; import java.util.stream.Stream; +import javax.annotation.Nullable; import org.apache.iceberg.MetadataUpdate; import org.apache.iceberg.Schema; import org.apache.iceberg.exceptions.ValidationException; @@ -46,6 +48,8 @@ public interface ViewMetadata extends Serializable { int SUPPORTED_VIEW_FORMAT_VERSION = 1; int DEFAULT_VIEW_FORMAT_VERSION = 1; + String uuid(); + int formatVersion(); String location(); @@ -75,6 +79,9 @@ default Integer currentSchemaId() { List changes(); + @Nullable + String metadataFileLocation(); + default ViewVersion version(int versionId) { return versionsById().get(versionId); } @@ -141,6 +148,8 @@ class Builder { private int formatVersion = DEFAULT_VIEW_FORMAT_VERSION; private int currentVersionId; private String location; + private String uuid; + private String metadataLocation; // internal change tracking private Integer lastAddedVersionId = null; @@ -157,6 +166,7 @@ private Builder() { this.history = Lists.newArrayList(); this.properties = Maps.newHashMap(); this.changes = Lists.newArrayList(); + this.uuid = null; } private Builder(ViewMetadata base) { @@ -170,6 +180,8 @@ private Builder(ViewMetadata base) { this.formatVersion = base.formatVersion(); this.currentVersionId = base.currentVersionId(); this.location = base.location(); + this.uuid = base.uuid(); + this.metadataLocation = null; } public Builder upgradeFormatVersion(int newFormatVersion) { @@ -199,6 +211,11 @@ public Builder setLocation(String newLocation) { return this; } + public Builder setMetadataLocation(String newMetadataLocation) { + this.metadataLocation = newMetadataLocation; + return this; + } + public Builder setCurrentVersionId(int newVersionId) { if (newVersionId == LAST_ADDED) { ValidationException.check( @@ -353,10 +370,29 @@ public Builder removeProperties(Set propertiesToRemove) { return this; } + public ViewMetadata.Builder assignUUID(String newUUID) { + Preconditions.checkArgument(newUUID != null, "Cannot set uuid to null"); + Preconditions.checkArgument(uuid == null || newUUID.equals(uuid), "Cannot reassign uuid"); + + if (!newUUID.equals(uuid)) { + this.uuid = newUUID; + changes.add(new MetadataUpdate.AssignUUID(uuid)); + } + + return this; + } + public ViewMetadata build() { Preconditions.checkArgument(null != location, "Invalid location: null"); Preconditions.checkArgument(versions.size() > 0, "Invalid view: no versions were added"); + // when associated with a metadata file, metadata must have no changes so that the metadata + // matches exactly what is in the metadata file, which does not store changes. metadata + // location with changes is inconsistent. + Preconditions.checkArgument( + metadataLocation == null || changes.isEmpty(), + "Cannot create view metadata with a metadata location and changes"); + int historySize = PropertyUtil.propertyAsInt( properties, @@ -386,6 +422,7 @@ public ViewMetadata build() { } return ImmutableViewMetadata.of( + null == uuid ? UUID.randomUUID().toString() : uuid, formatVersion, location, schemas, @@ -393,7 +430,8 @@ public ViewMetadata build() { retainedVersions, retainedHistory, properties, - changes); + changes, + metadataLocation); } static List expireVersions( diff --git a/core/src/main/java/org/apache/iceberg/view/ViewMetadataParser.java b/core/src/main/java/org/apache/iceberg/view/ViewMetadataParser.java index c994c82ea875..7a29c87bad9c 100644 --- a/core/src/main/java/org/apache/iceberg/view/ViewMetadataParser.java +++ b/core/src/main/java/org/apache/iceberg/view/ViewMetadataParser.java @@ -39,6 +39,7 @@ public class ViewMetadataParser { + static final String VIEW_UUID = "view-uuid"; static final String FORMAT_VERSION = "format-version"; static final String LOCATION = "location"; static final String CURRENT_VERSION_ID = "current-version-id"; @@ -62,6 +63,7 @@ static void toJson(ViewMetadata metadata, JsonGenerator gen) throws IOException gen.writeStartObject(); + gen.writeStringField(VIEW_UUID, metadata.uuid()); gen.writeNumberField(FORMAT_VERSION, metadata.formatVersion()); gen.writeStringField(LOCATION, metadata.location()); JsonUtil.writeStringMap(PROPERTIES, metadata.properties(), gen); @@ -88,16 +90,25 @@ static void toJson(ViewMetadata metadata, JsonGenerator gen) throws IOException gen.writeEndObject(); } + public static ViewMetadata fromJson(String metadataLocation, String json) { + return JsonUtil.parse(json, node -> ViewMetadataParser.fromJson(metadataLocation, node)); + } + public static ViewMetadata fromJson(String json) { Preconditions.checkArgument(json != null, "Cannot parse view metadata from null string"); return JsonUtil.parse(json, ViewMetadataParser::fromJson); } public static ViewMetadata fromJson(JsonNode json) { + return fromJson(null, json); + } + + public static ViewMetadata fromJson(String metadataLocation, JsonNode json) { Preconditions.checkArgument(json != null, "Cannot parse view metadata from null object"); Preconditions.checkArgument( json.isObject(), "Cannot parse view metadata from non-object: %s", json); + String uuid = JsonUtil.getString(VIEW_UUID, json); int formatVersion = JsonUtil.getInt(FORMAT_VERSION, json); String location = JsonUtil.getString(LOCATION, json); Map properties = JsonUtil.getStringMap(PROPERTIES, json); @@ -131,6 +142,7 @@ public static ViewMetadata fromJson(JsonNode json) { } return ImmutableViewMetadata.of( + uuid, formatVersion, location, schemas, @@ -138,7 +150,8 @@ public static ViewMetadata fromJson(JsonNode json) { versions, historyEntries, properties, - ImmutableList.of()); + ImmutableList.of(), + metadataLocation); } public static void overwrite(ViewMetadata metadata, OutputFile outputFile) { @@ -151,7 +164,7 @@ public static void write(ViewMetadata metadata, OutputFile outputFile) { public static ViewMetadata read(InputFile file) { try (InputStream is = file.newStream()) { - return fromJson(JsonUtil.mapper().readValue(is, JsonNode.class)); + return fromJson(file.location(), JsonUtil.mapper().readValue(is, JsonNode.class)); } catch (IOException e) { throw new UncheckedIOException(String.format("Failed to read json file: %s", file), e); } diff --git a/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java b/core/src/test/java/org/apache/iceberg/ScanPlanningAndReportingTestBase.java similarity index 93% rename from core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java rename to core/src/test/java/org/apache/iceberg/ScanPlanningAndReportingTestBase.java index 106c236f59b1..a8f98f82cc81 100644 --- a/core/src/test/java/org/apache/iceberg/TestScanPlanningAndReporting.java +++ b/core/src/test/java/org/apache/iceberg/ScanPlanningAndReportingTestBase.java @@ -36,14 +36,18 @@ import org.assertj.core.api.InstanceOfAssertFactories; import org.junit.Test; -public class TestScanPlanningAndReporting extends TableTestBase { +public abstract class ScanPlanningAndReportingTestBase< + ScanT extends Scan, T extends ScanTask, G extends ScanTaskGroup> + extends TableTestBase { private final TestMetricsReporter reporter = new TestMetricsReporter(); - public TestScanPlanningAndReporting() { + public ScanPlanningAndReportingTestBase() { super(2); } + protected abstract ScanT newScan(Table table); + @Test public void noDuplicatesInScanContext() { TableScanContext context = TableScanContext.empty(); @@ -82,12 +86,11 @@ public void scanningWithMultipleReporters() throws IOException { table.refresh(); AtomicInteger reportedCount = new AtomicInteger(); - TableScan tableScan = - table - .newScan() + ScanT tableScan = + newScan(table) .metricsReporter((MetricsReporter) -> reportedCount.getAndIncrement()) .metricsReporter((MetricsReporter) -> reportedCount.getAndIncrement()); - try (CloseableIterable fileScanTasks = tableScan.planFiles()) { + try (CloseableIterable fileScanTasks = tableScan.planFiles()) { fileScanTasks.forEach(task -> {}); } @@ -113,10 +116,10 @@ public void scanningWithMultipleDataManifests() throws IOException { table.newAppend().appendFile(FILE_A).appendFile(FILE_B).commit(); table.newAppend().appendFile(FILE_D).commit(); table.refresh(); - TableScan tableScan = table.newScan(); + ScanT tableScan = newScan(table); // should be 3 files - try (CloseableIterable fileScanTasks = tableScan.planFiles()) { + try (CloseableIterable fileScanTasks = tableScan.planFiles()) { fileScanTasks.forEach(task -> {}); } @@ -180,9 +183,9 @@ public void scanningWithDeletes() throws IOException { table.newAppend().appendFile(FILE_A).appendFile(FILE_B).appendFile(FILE_C).commit(); table.newRowDelta().addDeletes(FILE_A_DELETES).addDeletes(FILE_B_DELETES).commit(); - TableScan tableScan = table.newScan(); + ScanT tableScan = newScan(table); - try (CloseableIterable fileScanTasks = tableScan.planFiles()) { + try (CloseableIterable fileScanTasks = tableScan.planFiles()) { fileScanTasks.forEach(task -> {}); } @@ -218,12 +221,12 @@ public void scanningWithSkippedDataFiles() throws IOException { table.newAppend().appendFile(FILE_A).appendFile(FILE_B).appendFile(FILE_D).commit(); table.newOverwrite().deleteFile(FILE_A).addFile(FILE_A2).commit(); table.newAppend().appendFile(FILE_C).commit(); - TableScan tableScan = table.newScan(); + ScanT tableScan = newScan(table); List fileTasks = Lists.newArrayList(); - try (CloseableIterable fileScanTasks = + try (CloseableIterable scanTasks = tableScan.filter(Expressions.equal("data", "1")).planFiles()) { - fileScanTasks.forEach(fileTasks::add); + scanTasks.forEach(task -> fileTasks.add((FileScanTask) task)); } assertThat(fileTasks) .singleElement() @@ -259,12 +262,12 @@ public void scanningWithSkippedDeleteFiles() throws IOException { table.newOverwrite().deleteFile(FILE_A).addFile(FILE_A2).commit(); table.newRowDelta().addDeletes(FILE_A_DELETES).addDeletes(FILE_D2_DELETES).commit(); table.newRowDelta().addDeletes(FILE_B_DELETES).addDeletes(FILE_C2_DELETES).commit(); - TableScan tableScan = table.newScan(); + ScanT tableScan = newScan(table); List fileTasks = Lists.newArrayList(); - try (CloseableIterable fileScanTasks = + try (CloseableIterable scanTasks = tableScan.filter(Expressions.equal("data", "1")).planFiles()) { - fileScanTasks.forEach(fileTasks::add); + scanTasks.forEach(task -> fileTasks.add((FileScanTask) task)); } assertThat(fileTasks) .singleElement() @@ -302,9 +305,9 @@ public void scanningWithEqualityAndPositionalDeleteFiles() throws IOException { table.newAppend().appendFile(FILE_A).commit(); // FILE_A_DELETES = positionalDelete / FILE_A2_DELETES = equalityDelete table.newRowDelta().addDeletes(FILE_A_DELETES).addDeletes(FILE_A2_DELETES).commit(); - TableScan tableScan = table.newScan(); + ScanT tableScan = newScan(table); - try (CloseableIterable fileScanTasks = + try (CloseableIterable fileScanTasks = tableScan.filter(Expressions.equal("data", "6")).planFiles()) { fileScanTasks.forEach(task -> {}); } diff --git a/core/src/test/java/org/apache/iceberg/TestCommitReporting.java b/core/src/test/java/org/apache/iceberg/TestCommitReporting.java index 9998c47ff300..08c4ac33d6fd 100644 --- a/core/src/test/java/org/apache/iceberg/TestCommitReporting.java +++ b/core/src/test/java/org/apache/iceberg/TestCommitReporting.java @@ -21,7 +21,7 @@ import static org.assertj.core.api.Assertions.assertThat; import java.io.IOException; -import org.apache.iceberg.TestScanPlanningAndReporting.TestMetricsReporter; +import org.apache.iceberg.ScanPlanningAndReportingTestBase.TestMetricsReporter; import org.apache.iceberg.metrics.CommitMetricsResult; import org.apache.iceberg.metrics.CommitReport; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; diff --git a/core/src/test/java/org/apache/iceberg/TestDeleteFiles.java b/core/src/test/java/org/apache/iceberg/TestDeleteFiles.java index 4e4565306c00..63fc7010c49c 100644 --- a/core/src/test/java/org/apache/iceberg/TestDeleteFiles.java +++ b/core/src/test/java/org/apache/iceberg/TestDeleteFiles.java @@ -412,6 +412,37 @@ public void testDeleteWithCollision() { afterDeletePartitions); } + @Test + public void testDeleteValidateFileExistence() { + commit(table, table.newFastAppend().appendFile(FILE_B), branch); + Snapshot delete = + commit(table, table.newDelete().deleteFile(FILE_B).validateFilesExist(), branch); + validateManifestEntries( + Iterables.getOnlyElement(delete.allManifests(FILE_IO)), + ids(delete.snapshotId()), + files(FILE_B), + statuses(Status.DELETED)); + + Assertions.assertThatThrownBy( + () -> commit(table, table.newDelete().deleteFile(FILE_B).validateFilesExist(), branch)) + .isInstanceOf(ValidationException.class); + } + + @Test + public void testDeleteFilesNoValidation() { + commit(table, table.newFastAppend().appendFile(FILE_B), branch); + Snapshot delete1 = commit(table, table.newDelete().deleteFile(FILE_B), branch); + validateManifestEntries( + Iterables.getOnlyElement(delete1.allManifests(FILE_IO)), + ids(delete1.snapshotId()), + files(FILE_B), + statuses(Status.DELETED)); + + Snapshot delete2 = commit(table, table.newDelete().deleteFile(FILE_B), branch); + Assertions.assertThat(delete2.allManifests(FILE_IO).isEmpty()).isTrue(); + Assertions.assertThat(delete2.removedDataFiles(FILE_IO).iterator().hasNext()).isFalse(); + } + private static ByteBuffer longToBuffer(long value) { return ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN).putLong(0, value); } diff --git a/core/src/test/java/org/apache/iceberg/TestLocalScanPlanningAndReporting.java b/core/src/test/java/org/apache/iceberg/TestLocalScanPlanningAndReporting.java new file mode 100644 index 000000000000..dd8f5374f089 --- /dev/null +++ b/core/src/test/java/org/apache/iceberg/TestLocalScanPlanningAndReporting.java @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +public class TestLocalScanPlanningAndReporting + extends ScanPlanningAndReportingTestBase { + + @Override + protected TableScan newScan(Table table) { + return table.newScan(); + } +} diff --git a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java index 9a75beb59d2a..7ff21e4c389b 100644 --- a/core/src/test/java/org/apache/iceberg/TestTableMetadata.java +++ b/core/src/test/java/org/apache/iceberg/TestTableMetadata.java @@ -29,6 +29,7 @@ import static org.apache.iceberg.TableMetadataParser.SCHEMA; import static org.apache.iceberg.TableMetadataParser.SNAPSHOTS; import static org.apache.iceberg.TestHelpers.assertSameSchemaList; +import static org.assertj.core.api.Assertions.assertThat; import com.fasterxml.jackson.core.JsonGenerator; import java.io.File; @@ -1457,14 +1458,10 @@ public void testCreateV2MetadataThroughTableProperty() { null, ImmutableMap.of(TableProperties.FORMAT_VERSION, "2", "key", "val")); - Assert.assertEquals( - "format version should be configured based on the format-version key", - 2, - meta.formatVersion()); - Assert.assertEquals( - "should not contain format-version in properties", - ImmutableMap.of("key", "val"), - meta.properties()); + assertThat(meta.formatVersion()).isEqualTo(2); + assertThat(meta.properties()) + .containsEntry("key", "val") + .doesNotContainKey(TableProperties.FORMAT_VERSION); } @Test @@ -1486,14 +1483,11 @@ public void testReplaceV1MetadataToV2ThroughTableProperty() { meta.location(), ImmutableMap.of(TableProperties.FORMAT_VERSION, "2", "key2", "val2")); - Assert.assertEquals( - "format version should be configured based on the format-version key", - 2, - meta.formatVersion()); - Assert.assertEquals( - "should not contain format-version but should contain old and new properties", - ImmutableMap.of("key", "val", "key2", "val2"), - meta.properties()); + assertThat(meta.formatVersion()).isEqualTo(2); + assertThat(meta.properties()) + .containsEntry("key", "val") + .containsEntry("key2", "val2") + .doesNotContainKey(TableProperties.FORMAT_VERSION); } @Test diff --git a/core/src/test/java/org/apache/iceberg/view/TestViewMetadata.java b/core/src/test/java/org/apache/iceberg/view/TestViewMetadata.java index a852a716d53f..b525068cdff8 100644 --- a/core/src/test/java/org/apache/iceberg/view/TestViewMetadata.java +++ b/core/src/test/java/org/apache/iceberg/view/TestViewMetadata.java @@ -24,6 +24,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import java.util.UUID; import org.apache.iceberg.MetadataUpdate; import org.apache.iceberg.Schema; import org.apache.iceberg.catalog.Namespace; @@ -43,6 +44,8 @@ private ViewVersion newViewVersion(int id, String sql) { .defaultCatalog("prod") .defaultNamespace(Namespace.of("default")) .summary(ImmutableMap.of("operation", "create")) + .addRepresentations( + ImmutableSQLViewRepresentation.builder().dialect("spark").sql(sql).build()) .schemaId(1) .build(); } @@ -101,6 +104,10 @@ public void nullAndMissingFields() { () -> ViewMetadata.builder().setLocation("location").setCurrentVersionId(1).build()) .isInstanceOf(IllegalArgumentException.class) .hasMessage("Cannot set current version to unknown version: 1"); + + assertThatThrownBy(() -> ViewMetadata.builder().assignUUID(null).build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot set uuid to null"); } @Test @@ -380,8 +387,10 @@ public void viewMetadataAndMetadataChanges() { .defaultNamespace(Namespace.of("ns")) .build(); + String uuid = "fa6506c3-7681-40c8-86dc-e36561f83385"; ViewMetadata viewMetadata = ViewMetadata.builder() + .assignUUID(uuid) .setLocation("custom-location") .setProperties(properties) .addSchema(schemaOne) @@ -406,23 +415,30 @@ public void viewMetadataAndMetadataChanges() { assertThat(viewMetadata.properties()).isEqualTo(properties); List changes = viewMetadata.changes(); - assertThat(changes).hasSize(8); + assertThat(changes).hasSize(9); assertThat(changes) .element(0) + .isInstanceOf(MetadataUpdate.AssignUUID.class) + .asInstanceOf(InstanceOfAssertFactories.type(MetadataUpdate.AssignUUID.class)) + .extracting(MetadataUpdate.AssignUUID::uuid) + .isEqualTo(uuid); + + assertThat(changes) + .element(1) .isInstanceOf(MetadataUpdate.SetLocation.class) .asInstanceOf(InstanceOfAssertFactories.type(MetadataUpdate.SetLocation.class)) .extracting(MetadataUpdate.SetLocation::location) .isEqualTo("custom-location"); assertThat(changes) - .element(1) + .element(2) .isInstanceOf(MetadataUpdate.SetProperties.class) .asInstanceOf(InstanceOfAssertFactories.type(MetadataUpdate.SetProperties.class)) .extracting(MetadataUpdate.SetProperties::updated) .isEqualTo(properties); assertThat(changes) - .element(2) + .element(3) .isInstanceOf(MetadataUpdate.AddSchema.class) .asInstanceOf(InstanceOfAssertFactories.type(MetadataUpdate.AddSchema.class)) .extracting(MetadataUpdate.AddSchema::schema) @@ -430,7 +446,7 @@ public void viewMetadataAndMetadataChanges() { .isEqualTo(1); assertThat(changes) - .element(3) + .element(4) .isInstanceOf(MetadataUpdate.AddSchema.class) .asInstanceOf(InstanceOfAssertFactories.type(MetadataUpdate.AddSchema.class)) .extracting(MetadataUpdate.AddSchema::schema) @@ -438,31 +454,110 @@ public void viewMetadataAndMetadataChanges() { .isEqualTo(2); assertThat(changes) - .element(4) + .element(5) .isInstanceOf(MetadataUpdate.AddViewVersion.class) .asInstanceOf(InstanceOfAssertFactories.type(MetadataUpdate.AddViewVersion.class)) .extracting(MetadataUpdate.AddViewVersion::viewVersion) .isEqualTo(viewVersionOne); assertThat(changes) - .element(5) + .element(6) .isInstanceOf(MetadataUpdate.AddViewVersion.class) .asInstanceOf(InstanceOfAssertFactories.type(MetadataUpdate.AddViewVersion.class)) .extracting(MetadataUpdate.AddViewVersion::viewVersion) .isEqualTo(viewVersionTwo); assertThat(changes) - .element(6) + .element(7) .isInstanceOf(MetadataUpdate.AddViewVersion.class) .asInstanceOf(InstanceOfAssertFactories.type(MetadataUpdate.AddViewVersion.class)) .extracting(MetadataUpdate.AddViewVersion::viewVersion) .isEqualTo(viewVersionThree); assertThat(changes) - .element(7) + .element(8) .isInstanceOf(MetadataUpdate.SetCurrentViewVersion.class) .asInstanceOf(InstanceOfAssertFactories.type(MetadataUpdate.SetCurrentViewVersion.class)) .extracting(MetadataUpdate.SetCurrentViewVersion::versionId) .isEqualTo(-1); } + + @Test + public void uuidAssignment() { + String uuid = "fa6506c3-7681-40c8-86dc-e36561f83385"; + ViewMetadata viewMetadata = + ViewMetadata.builder() + .assignUUID(uuid) + .setLocation("custom-location") + .addSchema(new Schema(1, Types.NestedField.required(1, "x", Types.LongType.get()))) + .addVersion( + ImmutableViewVersion.builder() + .schemaId(1) + .versionId(1) + .timestampMillis(23L) + .putSummary("operation", "create") + .defaultNamespace(Namespace.of("ns")) + .build()) + .setCurrentVersionId(1) + .build(); + + assertThat(viewMetadata.uuid()).isEqualTo(uuid); + + // uuid should be carried over + ViewMetadata updated = ViewMetadata.buildFrom(viewMetadata).build(); + assertThat(updated.uuid()).isEqualTo(uuid); + assertThat(updated.changes()).isEmpty(); + + // assigning the same uuid shouldn't fail and shouldn't cause any changes + updated = ViewMetadata.buildFrom(viewMetadata).assignUUID(uuid).build(); + assertThat(updated.uuid()).isEqualTo(uuid); + assertThat(updated.changes()).isEmpty(); + + // can't reassign view uuid + assertThatThrownBy( + () -> + ViewMetadata.buildFrom(viewMetadata) + .assignUUID(UUID.randomUUID().toString()) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot reassign uuid"); + } + + @Test + public void viewMetadataWithMetadataLocation() { + Schema schema = new Schema(1, Types.NestedField.required(1, "x", Types.LongType.get())); + ViewVersion viewVersion = + ImmutableViewVersion.builder() + .schemaId(schema.schemaId()) + .versionId(1) + .timestampMillis(23L) + .putSummary("operation", "a") + .defaultNamespace(Namespace.of("ns")) + .build(); + + assertThatThrownBy( + () -> + ViewMetadata.builder() + .setLocation("custom-location") + .setMetadataLocation("metadata-location") + .addSchema(schema) + .addVersion(viewVersion) + .setCurrentVersionId(1) + .build()) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("Cannot create view metadata with a metadata location and changes"); + + // setting metadata location without changes is ok + ViewMetadata viewMetadata = + ViewMetadata.buildFrom( + ViewMetadata.builder() + .setLocation("custom-location") + .addSchema(schema) + .addVersion(viewVersion) + .setCurrentVersionId(1) + .build()) + .setMetadataLocation("metadata-location") + .build(); + assertThat(viewMetadata.metadataFileLocation()).isEqualTo("metadata-location"); + } } diff --git a/core/src/test/java/org/apache/iceberg/view/TestViewMetadataParser.java b/core/src/test/java/org/apache/iceberg/view/TestViewMetadataParser.java index 807a1df9b1d1..076626d9faad 100644 --- a/core/src/test/java/org/apache/iceberg/view/TestViewMetadataParser.java +++ b/core/src/test/java/org/apache/iceberg/view/TestViewMetadataParser.java @@ -89,6 +89,7 @@ public void readAndWriteValidViewMetadata() throws Exception { String json = readViewMetadataInputFile("org/apache/iceberg/view/ValidViewMetadata.json"); ViewMetadata expectedViewMetadata = ViewMetadata.builder() + .assignUUID("fa6506c3-7681-40c8-86dc-e36561f83385") .addSchema(TEST_SCHEMA) .addVersion(version1) .addVersion(version2) @@ -161,4 +162,69 @@ private String readViewMetadataInputFile(String fileName) throws Exception { Path path = Paths.get(getClass().getClassLoader().getResource(fileName).toURI()); return String.join("", java.nio.file.Files.readAllLines(path)); } + + @Test + public void viewMetadataWithMetadataLocation() throws Exception { + ViewVersion version1 = + ImmutableViewVersion.builder() + .versionId(1) + .timestampMillis(4353L) + .summary(ImmutableMap.of("operation", "create")) + .schemaId(1) + .defaultCatalog("some-catalog") + .defaultNamespace(Namespace.empty()) + .addRepresentations( + ImmutableSQLViewRepresentation.builder() + .sql("select 'foo' foo") + .dialect("spark-sql") + .build()) + .build(); + + ViewVersion version2 = + ImmutableViewVersion.builder() + .versionId(2) + .schemaId(1) + .timestampMillis(5555L) + .summary(ImmutableMap.of("operation", "replace")) + .defaultCatalog("some-catalog") + .defaultNamespace(Namespace.empty()) + .addRepresentations( + ImmutableSQLViewRepresentation.builder() + .sql("select 1 id, 'abc' data") + .dialect("spark-sql") + .build()) + .build(); + + String json = readViewMetadataInputFile("org/apache/iceberg/view/ValidViewMetadata.json"); + String metadataLocation = "s3://bucket/test/location/metadata/v1.metadata.json"; + ViewMetadata expectedViewMetadata = + ViewMetadata.buildFrom( + ViewMetadata.builder() + .assignUUID("fa6506c3-7681-40c8-86dc-e36561f83385") + .addSchema(TEST_SCHEMA) + .addVersion(version1) + .addVersion(version2) + .setLocation("s3://bucket/test/location") + .setProperties(ImmutableMap.of("some-key", "some-value")) + .setCurrentVersionId(2) + .upgradeFormatVersion(1) + .build()) + .setMetadataLocation(metadataLocation) + .build(); + + ViewMetadata actual = ViewMetadataParser.fromJson(metadataLocation, json); + assertThat(actual) + .usingRecursiveComparison() + .ignoringFieldsOfTypes(Schema.class) + .isEqualTo(expectedViewMetadata); + + actual = + ViewMetadataParser.fromJson( + metadataLocation, ViewMetadataParser.toJson(expectedViewMetadata)); + assertThat(actual) + .usingRecursiveComparison() + .ignoringFieldsOfTypes(Schema.class) + .isEqualTo(expectedViewMetadata); + assertThat(actual.metadataFileLocation()).isEqualTo(metadataLocation); + } } diff --git a/core/src/test/resources/org/apache/iceberg/view/ValidViewMetadata.json b/core/src/test/resources/org/apache/iceberg/view/ValidViewMetadata.json index 9c0ae0ecbef6..4e29ed8702a0 100644 --- a/core/src/test/resources/org/apache/iceberg/view/ValidViewMetadata.json +++ b/core/src/test/resources/org/apache/iceberg/view/ValidViewMetadata.json @@ -1,4 +1,5 @@ { + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", "format-version": 1, "location": "s3://bucket/test/location", "properties": {"some-key": "some-value"}, diff --git a/core/src/test/resources/org/apache/iceberg/view/ViewMetadataInvalidCurrentSchema.json b/core/src/test/resources/org/apache/iceberg/view/ViewMetadataInvalidCurrentSchema.json index b63c6a628518..e6bdff2aadbf 100644 --- a/core/src/test/resources/org/apache/iceberg/view/ViewMetadataInvalidCurrentSchema.json +++ b/core/src/test/resources/org/apache/iceberg/view/ViewMetadataInvalidCurrentSchema.json @@ -1,4 +1,5 @@ { + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", "format-version": 1, "location": "s3://bucket/test/location", "properties": {"some-key": "some-value"}, diff --git a/core/src/test/resources/org/apache/iceberg/view/ViewMetadataInvalidCurrentVersion.json b/core/src/test/resources/org/apache/iceberg/view/ViewMetadataInvalidCurrentVersion.json index fbcb2c9a4176..8db0359c0d11 100644 --- a/core/src/test/resources/org/apache/iceberg/view/ViewMetadataInvalidCurrentVersion.json +++ b/core/src/test/resources/org/apache/iceberg/view/ViewMetadataInvalidCurrentVersion.json @@ -1,4 +1,5 @@ { + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", "format-version": 1, "location": "s3://bucket/test/location", "properties": {"some-key": "some-value"}, diff --git a/core/src/test/resources/org/apache/iceberg/view/ViewMetadataMissingCurrentVersion.json b/core/src/test/resources/org/apache/iceberg/view/ViewMetadataMissingCurrentVersion.json index f09a7a4aa6b5..07febf71c9da 100644 --- a/core/src/test/resources/org/apache/iceberg/view/ViewMetadataMissingCurrentVersion.json +++ b/core/src/test/resources/org/apache/iceberg/view/ViewMetadataMissingCurrentVersion.json @@ -1,4 +1,5 @@ { + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", "format-version": 1, "location": "s3://bucket/test/location", "properties": {"some-key": "some-value"}, diff --git a/core/src/test/resources/org/apache/iceberg/view/ViewMetadataMissingLocation.json b/core/src/test/resources/org/apache/iceberg/view/ViewMetadataMissingLocation.json index d0fa7d9392a9..aa6d56ead3d4 100644 --- a/core/src/test/resources/org/apache/iceberg/view/ViewMetadataMissingLocation.json +++ b/core/src/test/resources/org/apache/iceberg/view/ViewMetadataMissingLocation.json @@ -1,4 +1,5 @@ { + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", "format-version": 1, "properties": {"some-key": "some-value"}, "current-schema-id": 1, diff --git a/data/src/test/java/org/apache/iceberg/io/TestFileWriterFactory.java b/data/src/test/java/org/apache/iceberg/io/TestFileWriterFactory.java index 7910c666b45d..e25a179edbfc 100644 --- a/data/src/test/java/org/apache/iceberg/io/TestFileWriterFactory.java +++ b/data/src/test/java/org/apache/iceberg/io/TestFileWriterFactory.java @@ -46,6 +46,7 @@ import org.apache.iceberg.parquet.Parquet; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.types.Types; import org.apache.iceberg.util.CharSequenceSet; import org.apache.iceberg.util.Pair; import org.apache.iceberg.util.StructLikeSet; @@ -232,14 +233,20 @@ public void testPositionDeleteWriter() throws IOException { if (fileFormat == FileFormat.AVRO) { Assert.assertNull(deleteFile.lowerBounds()); Assert.assertNull(deleteFile.upperBounds()); + Assert.assertNull(deleteFile.columnSizes()); } else { Assert.assertEquals(1, referencedDataFiles.size()); Assert.assertEquals(2, deleteFile.lowerBounds().size()); Assert.assertTrue(deleteFile.lowerBounds().containsKey(DELETE_FILE_PATH.fieldId())); Assert.assertEquals(2, deleteFile.upperBounds().size()); Assert.assertTrue(deleteFile.upperBounds().containsKey(DELETE_FILE_PATH.fieldId())); + Assert.assertEquals(2, deleteFile.columnSizes().size()); } + Assert.assertNull(deleteFile.valueCounts()); + Assert.assertNull(deleteFile.nullValueCounts()); + Assert.assertNull(deleteFile.nanValueCounts()); + // verify the written delete file GenericRecord deleteRecord = GenericRecord.create(DeleteSchemaUtil.pathPosSchema()); List expectedDeletes = @@ -281,6 +288,34 @@ public void testPositionDeleteWriterWithRow() throws IOException { DeleteFile deleteFile = result.first(); CharSequenceSet referencedDataFiles = result.second(); + if (fileFormat == FileFormat.AVRO) { + Assert.assertNull(deleteFile.lowerBounds()); + Assert.assertNull(deleteFile.upperBounds()); + Assert.assertNull(deleteFile.columnSizes()); + Assert.assertNull(deleteFile.valueCounts()); + Assert.assertNull(deleteFile.nullValueCounts()); + Assert.assertNull(deleteFile.nanValueCounts()); + } else { + Assert.assertEquals(1, referencedDataFiles.size()); + Assert.assertEquals(4, deleteFile.lowerBounds().size()); + Assert.assertTrue(deleteFile.lowerBounds().containsKey(DELETE_FILE_PATH.fieldId())); + Assert.assertTrue(deleteFile.lowerBounds().containsKey(DELETE_FILE_POS.fieldId())); + for (Types.NestedField column : table.schema().columns()) { + Assert.assertTrue(deleteFile.lowerBounds().containsKey(column.fieldId())); + } + Assert.assertEquals(4, deleteFile.upperBounds().size()); + Assert.assertTrue(deleteFile.upperBounds().containsKey(DELETE_FILE_PATH.fieldId())); + Assert.assertTrue(deleteFile.upperBounds().containsKey(DELETE_FILE_POS.fieldId())); + for (Types.NestedField column : table.schema().columns()) { + Assert.assertTrue(deleteFile.upperBounds().containsKey(column.fieldId())); + } + // ORC also contains metrics for the deleted row struct, not just actual data fields + Assert.assertTrue(deleteFile.columnSizes().size() >= 4); + Assert.assertTrue(deleteFile.valueCounts().size() >= 2); + Assert.assertTrue(deleteFile.nullValueCounts().size() >= 2); + Assert.assertNull(deleteFile.nanValueCounts()); + } + // verify the written delete file GenericRecord deletedRow = GenericRecord.create(table.schema()); Schema positionDeleteSchema = DeleteSchemaUtil.posDeleteSchema(table.schema()); @@ -336,6 +371,15 @@ public void testPositionDeleteWriterMultipleDataFiles() throws IOException { Assert.assertEquals(2, referencedDataFiles.size()); Assert.assertNull(deleteFile.lowerBounds()); Assert.assertNull(deleteFile.upperBounds()); + Assert.assertNull(deleteFile.valueCounts()); + Assert.assertNull(deleteFile.nullValueCounts()); + Assert.assertNull(deleteFile.nanValueCounts()); + + if (fileFormat == FileFormat.AVRO) { + Assert.assertNull(deleteFile.columnSizes()); + } else { + Assert.assertEquals(2, deleteFile.columnSizes().size()); + } // commit the data and delete files table diff --git a/flink/v1.15/build.gradle b/flink/v1.15/build.gradle index febc678c2bec..a77ec1b50ab3 100644 --- a/flink/v1.15/build.gradle +++ b/flink/v1.15/build.gradle @@ -114,6 +114,11 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { } testImplementation libs.awaitility + testImplementation libs.assertj.core + } + + test { + useJUnitPlatform() } } diff --git a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/FlinkConfParser.java b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/FlinkConfParser.java index 65717089d0d8..7167859e600c 100644 --- a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/FlinkConfParser.java +++ b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/FlinkConfParser.java @@ -18,11 +18,13 @@ */ package org.apache.iceberg.flink; +import java.time.Duration; import java.util.List; import java.util.Map; import java.util.function.Function; import org.apache.flink.configuration.ConfigOption; import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.util.TimeUtils; import org.apache.iceberg.Table; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -59,6 +61,10 @@ public StringConfParser stringConf() { return new StringConfParser(); } + public DurationConfParser durationConf() { + return new DurationConfParser(); + } + class BooleanConfParser extends ConfParser { private Boolean defaultValue; @@ -180,6 +186,29 @@ public E parseOptional() { } } + class DurationConfParser extends ConfParser { + private Duration defaultValue; + + @Override + protected DurationConfParser self() { + return this; + } + + public DurationConfParser defaultValue(Duration value) { + this.defaultValue = value; + return self(); + } + + public Duration parse() { + Preconditions.checkArgument(defaultValue != null, "Default value cannot be null"); + return parse(TimeUtils::parseDuration, defaultValue); + } + + public Duration parseOptional() { + return parse(TimeUtils::parseDuration, null); + } + } + abstract class ConfParser { private final List optionNames = Lists.newArrayList(); private String tablePropertyName; diff --git a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteConf.java b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteConf.java index aba23389f2fe..ca7b1120bc81 100644 --- a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteConf.java +++ b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteConf.java @@ -18,7 +18,9 @@ */ package org.apache.iceberg.flink; +import java.time.Duration; import java.util.Map; +import org.apache.flink.annotation.Experimental; import org.apache.flink.configuration.ReadableConfig; import org.apache.iceberg.DistributionMode; import org.apache.iceberg.FileFormat; @@ -184,4 +186,20 @@ public String branch() { public Integer writeParallelism() { return confParser.intConf().option(FlinkWriteOptions.WRITE_PARALLELISM.key()).parseOptional(); } + + /** + * NOTE: This may be removed or changed in a future release. This value specifies the interval for + * refreshing the table instances in sink writer subtasks. If not specified then the default + * behavior is to not refresh the table. + * + * @return the interval for refreshing the table in sink writer subtasks + */ + @Experimental + public Duration tableRefreshInterval() { + return confParser + .durationConf() + .option(FlinkWriteOptions.TABLE_REFRESH_INTERVAL.key()) + .flinkConfig(FlinkWriteOptions.TABLE_REFRESH_INTERVAL) + .parseOptional(); + } } diff --git a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteOptions.java b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteOptions.java index ba0931318e0d..df73f2e09cac 100644 --- a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteOptions.java +++ b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteOptions.java @@ -18,6 +18,8 @@ */ package org.apache.iceberg.flink; +import java.time.Duration; +import org.apache.flink.annotation.Experimental; import org.apache.flink.configuration.ConfigOption; import org.apache.flink.configuration.ConfigOptions; import org.apache.iceberg.SnapshotRef; @@ -64,4 +66,8 @@ private FlinkWriteOptions() {} public static final ConfigOption WRITE_PARALLELISM = ConfigOptions.key("write-parallelism").intType().noDefaultValue(); + + @Experimental + public static final ConfigOption TABLE_REFRESH_INTERVAL = + ConfigOptions.key("table-refresh-interval").durationType().noDefaultValue(); } diff --git a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/TableLoader.java b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/TableLoader.java index f18c5ccda1f6..da509451fee7 100644 --- a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/TableLoader.java +++ b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/TableLoader.java @@ -38,6 +38,8 @@ public interface TableLoader extends Closeable, Serializable, Cloneable { void open(); + boolean isOpen(); + Table loadTable(); /** Clone a TableLoader */ @@ -75,6 +77,11 @@ public void open() { tables = new HadoopTables(hadoopConf.get()); } + @Override + public boolean isOpen() { + return tables != null; + } + @Override public Table loadTable() { FlinkEnvironmentContext.init(); @@ -115,6 +122,11 @@ public void open() { catalog = catalogLoader.loadCatalog(); } + @Override + public boolean isOpen() { + return catalog != null; + } + @Override public Table loadTable() { FlinkEnvironmentContext.init(); @@ -126,6 +138,8 @@ public void close() throws IOException { if (catalog instanceof Closeable) { ((Closeable) catalog).close(); } + + catalog = null; } @Override diff --git a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/CachingTableSupplier.java b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/CachingTableSupplier.java new file mode 100644 index 000000000000..e9f9786f9190 --- /dev/null +++ b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/CachingTableSupplier.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.flink.sink; + +import java.time.Duration; +import org.apache.flink.util.Preconditions; +import org.apache.iceberg.SerializableTable; +import org.apache.iceberg.Table; +import org.apache.iceberg.flink.TableLoader; +import org.apache.iceberg.util.DateTimeUtil; +import org.apache.iceberg.util.SerializableSupplier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A table loader that will only reload a table after a certain interval has passed. WARNING: This + * table loader should be used carefully when used with writer tasks. It could result in heavy load + * on a catalog for jobs with many writers. + */ +class CachingTableSupplier implements SerializableSupplier { + + private static final Logger LOG = LoggerFactory.getLogger(CachingTableSupplier.class); + + private final Table initialTable; + private final TableLoader tableLoader; + private final Duration tableRefreshInterval; + private long lastLoadTimeMillis; + private transient Table table; + + CachingTableSupplier( + SerializableTable initialTable, TableLoader tableLoader, Duration tableRefreshInterval) { + Preconditions.checkArgument(initialTable != null, "initialTable cannot be null"); + Preconditions.checkArgument(tableLoader != null, "tableLoader cannot be null"); + Preconditions.checkArgument( + tableRefreshInterval != null, "tableRefreshInterval cannot be null"); + this.initialTable = initialTable; + this.table = initialTable; + this.tableLoader = tableLoader; + this.tableRefreshInterval = tableRefreshInterval; + this.lastLoadTimeMillis = System.currentTimeMillis(); + } + + @Override + public Table get() { + if (table == null) { + this.table = initialTable; + } + return table; + } + + Table initialTable() { + return initialTable; + } + + void refreshTable() { + if (System.currentTimeMillis() > lastLoadTimeMillis + tableRefreshInterval.toMillis()) { + try { + if (!tableLoader.isOpen()) { + tableLoader.open(); + } + + this.table = tableLoader.loadTable(); + this.lastLoadTimeMillis = System.currentTimeMillis(); + + LOG.info( + "Table {} reloaded, next min load time threshold is {}", + table.name(), + DateTimeUtil.formatTimestampMillis( + lastLoadTimeMillis + tableRefreshInterval.toMillis())); + } catch (Exception e) { + LOG.warn("An error occurred reloading table {}, table was not reloaded", table.name(), e); + } + } + } +} diff --git a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkManifestUtil.java b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkManifestUtil.java index 00d55f937cc4..c7e8a2dea7cb 100644 --- a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkManifestUtil.java +++ b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkManifestUtil.java @@ -24,13 +24,11 @@ import java.util.function.Supplier; import org.apache.iceberg.DataFile; import org.apache.iceberg.DeleteFile; -import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.ManifestFile; import org.apache.iceberg.ManifestFiles; import org.apache.iceberg.ManifestWriter; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Table; -import org.apache.iceberg.TableOperations; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.OutputFile; @@ -64,16 +62,14 @@ static List readDataFiles( } static ManifestOutputFileFactory createOutputFileFactory( - Table table, String flinkJobId, String operatorUniqueId, int subTaskId, long attemptNumber) { - TableOperations ops = ((HasTableOperations) table).operations(); + Supplier
tableSupplier, + Map tableProps, + String flinkJobId, + String operatorUniqueId, + int subTaskId, + long attemptNumber) { return new ManifestOutputFileFactory( - ops, - table.io(), - table.properties(), - flinkJobId, - operatorUniqueId, - subTaskId, - attemptNumber); + tableSupplier, tableProps, flinkJobId, operatorUniqueId, subTaskId, attemptNumber); } /** diff --git a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkSink.java b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkSink.java index 023702790116..58828799255d 100644 --- a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkSink.java +++ b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkSink.java @@ -28,6 +28,7 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.time.Duration; import java.util.List; import java.util.Map; import java.util.Set; @@ -67,6 +68,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.TypeUtil; +import org.apache.iceberg.util.SerializableSupplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -330,7 +332,10 @@ private DataStreamSink chainIcebergOperators() { DataStream rowDataInput = inputCreator.apply(uidPrefix); if (table == null) { - tableLoader.open(); + if (!tableLoader.isOpen()) { + tableLoader.open(); + } + try (TableLoader loader = tableLoader) { this.table = loader.loadTable(); } catch (IOException e) { @@ -462,8 +467,19 @@ private SingleOutputStreamOperator appendWriter( } } + SerializableTable serializableTable = (SerializableTable) SerializableTable.copyOf(table); + Duration tableRefreshInterval = flinkWriteConf.tableRefreshInterval(); + + SerializableSupplier
tableSupplier; + if (tableRefreshInterval != null) { + tableSupplier = + new CachingTableSupplier(serializableTable, tableLoader, tableRefreshInterval); + } else { + tableSupplier = () -> serializableTable; + } + IcebergStreamWriter streamWriter = - createStreamWriter(table, flinkWriteConf, flinkRowType, equalityFieldIds); + createStreamWriter(tableSupplier, flinkWriteConf, flinkRowType, equalityFieldIds); int parallelism = flinkWriteConf.writeParallelism() == null @@ -580,24 +596,25 @@ static RowType toFlinkRowType(Schema schema, TableSchema requestedSchema) { } static IcebergStreamWriter createStreamWriter( - Table table, + SerializableSupplier
tableSupplier, FlinkWriteConf flinkWriteConf, RowType flinkRowType, List equalityFieldIds) { - Preconditions.checkArgument(table != null, "Iceberg table shouldn't be null"); + Preconditions.checkArgument(tableSupplier != null, "Iceberg table supplier shouldn't be null"); - Table serializableTable = SerializableTable.copyOf(table); + Table initTable = tableSupplier.get(); FileFormat format = flinkWriteConf.dataFileFormat(); TaskWriterFactory taskWriterFactory = new RowDataTaskWriterFactory( - serializableTable, + tableSupplier, flinkRowType, flinkWriteConf.targetDataFileSize(), format, - writeProperties(table, format, flinkWriteConf), + writeProperties(initTable, format, flinkWriteConf), equalityFieldIds, flinkWriteConf.upsertMode()); - return new IcebergStreamWriter<>(table.name(), taskWriterFactory); + + return new IcebergStreamWriter<>(initTable.name(), taskWriterFactory); } /** diff --git a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergFilesCommitter.java b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergFilesCommitter.java index 3805ab298428..b9bceaa9311d 100644 --- a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergFilesCommitter.java +++ b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergFilesCommitter.java @@ -160,7 +160,7 @@ public void initializeState(StateInitializationContext context) throws Exception int attemptId = getRuntimeContext().getAttemptNumber(); this.manifestOutputFileFactory = FlinkManifestUtil.createOutputFileFactory( - table, flinkJobId, operatorUniqueId, subTaskId, attemptId); + () -> table, table.properties(), flinkJobId, operatorUniqueId, subTaskId, attemptId); this.maxCommittedCheckpointId = INITIAL_CHECKPOINT_ID; this.checkpointsState = context.getOperatorStateStore().getListState(STATE_DESCRIPTOR); @@ -247,6 +247,9 @@ public void notifyCheckpointComplete(long checkpointId) throws Exception { checkpointId, maxCommittedCheckpointId); } + + // reload the table in case new configuration is needed + this.table = tableLoader.loadTable(); } private void commitUpToCheckpoint( diff --git a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/ManifestOutputFileFactory.java b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/ManifestOutputFileFactory.java index 045e45a4ceae..da5e6e7627ae 100644 --- a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/ManifestOutputFileFactory.java +++ b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/ManifestOutputFileFactory.java @@ -20,9 +20,11 @@ import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; import org.apache.iceberg.FileFormat; +import org.apache.iceberg.HasTableOperations; +import org.apache.iceberg.Table; import org.apache.iceberg.TableOperations; -import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.relocated.com.google.common.base.Strings; @@ -31,8 +33,7 @@ class ManifestOutputFileFactory { // properties. static final String FLINK_MANIFEST_LOCATION = "flink.manifests.location"; - private final TableOperations ops; - private final FileIO io; + private final Supplier
tableSupplier; private final Map props; private final String flinkJobId; private final String operatorUniqueId; @@ -41,15 +42,13 @@ class ManifestOutputFileFactory { private final AtomicInteger fileCount = new AtomicInteger(0); ManifestOutputFileFactory( - TableOperations ops, - FileIO io, + Supplier
tableSupplier, Map props, String flinkJobId, String operatorUniqueId, int subTaskId, long attemptNumber) { - this.ops = ops; - this.io = io; + this.tableSupplier = tableSupplier; this.props = props; this.flinkJobId = flinkJobId; this.operatorUniqueId = operatorUniqueId; @@ -71,6 +70,7 @@ private String generatePath(long checkpointId) { OutputFile create(long checkpointId) { String flinkManifestDir = props.get(FLINK_MANIFEST_LOCATION); + TableOperations ops = ((HasTableOperations) tableSupplier.get()).operations(); String newManifestFullPath; if (Strings.isNullOrEmpty(flinkManifestDir)) { @@ -81,7 +81,7 @@ OutputFile create(long checkpointId) { String.format("%s/%s", stripTrailingSlash(flinkManifestDir), generatePath(checkpointId)); } - return io.newOutputFile(newManifestFullPath); + return tableSupplier.get().io().newOutputFile(newManifestFullPath); } private static String stripTrailingSlash(String path) { diff --git a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/RowDataTaskWriterFactory.java b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/RowDataTaskWriterFactory.java index c624eb3f0276..67422a1afeb1 100644 --- a/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/RowDataTaskWriterFactory.java +++ b/flink/v1.15/flink/src/main/java/org/apache/iceberg/flink/sink/RowDataTaskWriterFactory.java @@ -20,6 +20,7 @@ import java.util.List; import java.util.Map; +import java.util.function.Supplier; import org.apache.flink.table.data.RowData; import org.apache.flink.table.types.logical.RowType; import org.apache.iceberg.FileFormat; @@ -38,13 +39,13 @@ import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.util.ArrayUtil; +import org.apache.iceberg.util.SerializableSupplier; public class RowDataTaskWriterFactory implements TaskWriterFactory { - private final Table table; + private final Supplier
tableSupplier; private final Schema schema; private final RowType flinkSchema; private final PartitionSpec spec; - private final FileIO io; private final long targetFileSizeBytes; private final FileFormat format; private final List equalityFieldIds; @@ -61,11 +62,37 @@ public RowDataTaskWriterFactory( Map writeProperties, List equalityFieldIds, boolean upsert) { - this.table = table; + this( + () -> table, + flinkSchema, + targetFileSizeBytes, + format, + writeProperties, + equalityFieldIds, + upsert); + } + + public RowDataTaskWriterFactory( + SerializableSupplier
tableSupplier, + RowType flinkSchema, + long targetFileSizeBytes, + FileFormat format, + Map writeProperties, + List equalityFieldIds, + boolean upsert) { + this.tableSupplier = tableSupplier; + + Table table; + if (tableSupplier instanceof CachingTableSupplier) { + // rely on the initial table metadata for schema, etc., until schema evolution is supported + table = ((CachingTableSupplier) tableSupplier).initialTable(); + } else { + table = tableSupplier.get(); + } + this.schema = table.schema(); this.flinkSchema = flinkSchema; this.spec = table.spec(); - this.io = table.io(); this.targetFileSizeBytes = targetFileSizeBytes; this.format = format; this.equalityFieldIds = equalityFieldIds; @@ -107,8 +134,21 @@ public RowDataTaskWriterFactory( @Override public void initialize(int taskId, int attemptId) { + Table table; + if (tableSupplier instanceof CachingTableSupplier) { + // rely on the initial table metadata for schema, etc., until schema evolution is supported + table = ((CachingTableSupplier) tableSupplier).initialTable(); + } else { + table = tableSupplier.get(); + } + + refreshTable(); + this.outputFileFactory = - OutputFileFactory.builderFor(table, taskId, attemptId).format(format).build(); + OutputFileFactory.builderFor(table, taskId, attemptId) + .format(format) + .ioSupplier(() -> tableSupplier.get().io()) + .build(); } @Override @@ -117,18 +157,25 @@ public TaskWriter create() { outputFileFactory, "The outputFileFactory shouldn't be null if we have invoked the initialize()."); + refreshTable(); + if (equalityFieldIds == null || equalityFieldIds.isEmpty()) { // Initialize a task writer to write INSERT only. if (spec.isUnpartitioned()) { return new UnpartitionedWriter<>( - spec, format, appenderFactory, outputFileFactory, io, targetFileSizeBytes); + spec, + format, + appenderFactory, + outputFileFactory, + tableSupplier.get().io(), + targetFileSizeBytes); } else { return new RowDataPartitionedFanoutWriter( spec, format, appenderFactory, outputFileFactory, - io, + tableSupplier.get().io(), targetFileSizeBytes, schema, flinkSchema); @@ -141,7 +188,7 @@ public TaskWriter create() { format, appenderFactory, outputFileFactory, - io, + tableSupplier.get().io(), targetFileSizeBytes, schema, flinkSchema, @@ -153,7 +200,7 @@ public TaskWriter create() { format, appenderFactory, outputFileFactory, - io, + tableSupplier.get().io(), targetFileSizeBytes, schema, flinkSchema, @@ -163,6 +210,12 @@ public TaskWriter create() { } } + void refreshTable() { + if (tableSupplier instanceof CachingTableSupplier) { + ((CachingTableSupplier) tableSupplier).refreshTable(); + } + } + private static class RowDataPartitionedFanoutWriter extends PartitionedFanoutWriter { private final PartitionKey partitionKey; diff --git a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/TestFlinkCatalogTable.java b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/TestFlinkCatalogTable.java index d907a58ec2bc..7f47b70286f3 100644 --- a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/TestFlinkCatalogTable.java +++ b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/TestFlinkCatalogTable.java @@ -18,6 +18,8 @@ */ package org.apache.iceberg.flink; +import static org.assertj.core.api.Assertions.assertThat; + import java.util.Arrays; import java.util.Collections; import java.util.Map; @@ -48,7 +50,6 @@ import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Maps; @@ -121,12 +122,10 @@ public void testCreateTable() throws TableNotExistException { Assert.assertEquals( new Schema(Types.NestedField.optional(1, "id", Types.LongType.get())).asStruct(), table.schema().asStruct()); - Assert.assertEquals(Maps.newHashMap(), table.properties()); CatalogTable catalogTable = catalogTable("tl"); Assert.assertEquals( TableSchema.builder().field("id", DataTypes.BIGINT()).build(), catalogTable.getSchema()); - Assert.assertEquals(Maps.newHashMap(), catalogTable.getOptions()); } @Test @@ -176,7 +175,7 @@ public void testCreateTableIfNotExists() { sql("CREATE TABLE tl(id BIGINT)"); // Assert that table does exist. - Assert.assertEquals(Maps.newHashMap(), table("tl").properties()); + assertThat(table("tl")).isNotNull(); sql("DROP TABLE tl"); AssertHelpers.assertThrows( @@ -186,15 +185,13 @@ public void testCreateTableIfNotExists() { () -> table("tl")); sql("CREATE TABLE IF NOT EXISTS tl(id BIGINT)"); - Assert.assertEquals(Maps.newHashMap(), table("tl").properties()); + assertThat(table("tl").properties()).doesNotContainKey("key"); - final Map expectedProperties = ImmutableMap.of("key", "value"); table("tl").updateProperties().set("key", "value").commit(); - Assert.assertEquals(expectedProperties, table("tl").properties()); + assertThat(table("tl").properties()).containsEntry("key", "value"); sql("CREATE TABLE IF NOT EXISTS tl(id BIGINT)"); - Assert.assertEquals( - "Should still be the old table.", expectedProperties, table("tl").properties()); + assertThat(table("tl").properties()).containsEntry("key", "value"); } @Test @@ -206,12 +203,10 @@ public void testCreateTableLike() throws TableNotExistException { Assert.assertEquals( new Schema(Types.NestedField.optional(1, "id", Types.LongType.get())).asStruct(), table.schema().asStruct()); - Assert.assertEquals(Maps.newHashMap(), table.properties()); CatalogTable catalogTable = catalogTable("tl2"); Assert.assertEquals( TableSchema.builder().field("id", DataTypes.BIGINT()).build(), catalogTable.getSchema()); - Assert.assertEquals(Maps.newHashMap(), catalogTable.getOptions()); } @Test @@ -226,7 +221,6 @@ public void testCreateTableLocation() { new Schema(Types.NestedField.optional(1, "id", Types.LongType.get())).asStruct(), table.schema().asStruct()); Assert.assertEquals("file:///tmp/location", table.location()); - Assert.assertEquals(Maps.newHashMap(), table.properties()); } @Test @@ -242,7 +236,6 @@ public void testCreatePartitionTable() throws TableNotExistException { table.schema().asStruct()); Assert.assertEquals( PartitionSpec.builderFor(table.schema()).identity("dt").build(), table.spec()); - Assert.assertEquals(Maps.newHashMap(), table.properties()); CatalogTable catalogTable = catalogTable("tl"); Assert.assertEquals( @@ -251,7 +244,6 @@ public void testCreatePartitionTable() throws TableNotExistException { .field("dt", DataTypes.STRING()) .build(), catalogTable.getSchema()); - Assert.assertEquals(Maps.newHashMap(), catalogTable.getOptions()); Assert.assertEquals(Collections.singletonList("dt"), catalogTable.getPartitionKeys()); } @@ -304,7 +296,6 @@ public void testLoadTransformPartitionTable() throws TableNotExistException { CatalogTable catalogTable = catalogTable("tl"); Assert.assertEquals( TableSchema.builder().field("id", DataTypes.BIGINT()).build(), catalogTable.getSchema()); - Assert.assertEquals(Maps.newHashMap(), catalogTable.getOptions()); Assert.assertEquals(Collections.emptyList(), catalogTable.getPartitionKeys()); } @@ -317,12 +308,12 @@ public void testAlterTable() throws TableNotExistException { // new sql("ALTER TABLE tl SET('newK'='newV')"); properties.put("newK", "newV"); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); // update old sql("ALTER TABLE tl SET('oldK'='oldV2')"); properties.put("oldK", "oldV2"); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); // remove property CatalogTable catalogTable = catalogTable("tl"); @@ -331,7 +322,7 @@ public void testAlterTable() throws TableNotExistException { .getCatalog(getTableEnv().getCurrentCatalog()) .get() .alterTable(new ObjectPath(DATABASE, "tl"), catalogTable.copy(properties), false); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); } @Test @@ -343,12 +334,12 @@ public void testAlterTableWithPrimaryKey() throws TableNotExistException { // new sql("ALTER TABLE tl SET('newK'='newV')"); properties.put("newK", "newV"); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); // update old sql("ALTER TABLE tl SET('oldK'='oldV2')"); properties.put("oldK", "oldV2"); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); // remove property CatalogTable catalogTable = catalogTable("tl"); @@ -357,7 +348,7 @@ public void testAlterTableWithPrimaryKey() throws TableNotExistException { .getCatalog(getTableEnv().getCurrentCatalog()) .get() .alterTable(new ObjectPath(DATABASE, "tl"), catalogTable.copy(properties), false); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); } @Test diff --git a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/TestFlinkConfParser.java b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/TestFlinkConfParser.java new file mode 100644 index 000000000000..4b6ac25ab8e3 --- /dev/null +++ b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/TestFlinkConfParser.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.flink; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.time.Duration; +import java.util.Map; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.configuration.Configuration; +import org.apache.iceberg.Table; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; + +public class TestFlinkConfParser { + + @Test + public void testDurationConf() { + Map writeOptions = ImmutableMap.of("write-prop", "111s"); + + ConfigOption configOption = + ConfigOptions.key("conf-prop").durationType().noDefaultValue(); + Configuration flinkConf = new Configuration(); + flinkConf.setString(configOption.key(), "222s"); + + Table table = mock(Table.class); + when(table.properties()).thenReturn(ImmutableMap.of("table-prop", "333s")); + + FlinkConfParser confParser = new FlinkConfParser(table, writeOptions, flinkConf); + Duration defaultVal = Duration.ofMillis(999); + + Duration result = + confParser.durationConf().option("write-prop").defaultValue(defaultVal).parse(); + assertThat(result).isEqualTo(Duration.ofSeconds(111)); + + result = confParser.durationConf().flinkConfig(configOption).defaultValue(defaultVal).parse(); + assertThat(result).isEqualTo(Duration.ofSeconds(222)); + + result = confParser.durationConf().tableProperty("table-prop").defaultValue(defaultVal).parse(); + assertThat(result).isEqualTo(Duration.ofSeconds(333)); + } +} diff --git a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/TestTableLoader.java b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/TestTableLoader.java index 93e97d5aa3bc..4ad302dde436 100644 --- a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/TestTableLoader.java +++ b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/TestTableLoader.java @@ -36,6 +36,11 @@ public TestTableLoader(String dir) { @Override public void open() {} + @Override + public boolean isOpen() { + return true; + } + @Override public Table loadTable() { return TestTables.load(dir, "test"); diff --git a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestCachingTableSupplier.java b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestCachingTableSupplier.java new file mode 100644 index 000000000000..360db658cd2f --- /dev/null +++ b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestCachingTableSupplier.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.flink.sink; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.time.Duration; +import java.util.concurrent.TimeUnit; +import org.apache.iceberg.SerializableTable; +import org.apache.iceberg.Table; +import org.apache.iceberg.flink.TableLoader; +import org.awaitility.Awaitility; +import org.junit.jupiter.api.Test; + +public class TestCachingTableSupplier { + + @Test + public void testCheckArguments() { + SerializableTable initialTable = mock(SerializableTable.class); + + Table loadedTable = mock(Table.class); + TableLoader tableLoader = mock(TableLoader.class); + when(tableLoader.loadTable()).thenReturn(loadedTable); + + new CachingTableSupplier(initialTable, tableLoader, Duration.ofMillis(100)); + + assertThatThrownBy(() -> new CachingTableSupplier(initialTable, tableLoader, null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("tableRefreshInterval cannot be null"); + assertThatThrownBy(() -> new CachingTableSupplier(null, tableLoader, Duration.ofMillis(100))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("initialTable cannot be null"); + assertThatThrownBy(() -> new CachingTableSupplier(initialTable, null, Duration.ofMillis(100))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("tableLoader cannot be null"); + } + + @Test + public void testTableReload() { + SerializableTable initialTable = mock(SerializableTable.class); + + Table loadedTable = mock(Table.class); + TableLoader tableLoader = mock(TableLoader.class); + when(tableLoader.loadTable()).thenReturn(loadedTable); + + CachingTableSupplier cachingTableSupplier = + new CachingTableSupplier(initialTable, tableLoader, Duration.ofMillis(100)); + + // refresh shouldn't do anything as the min reload interval hasn't passed + cachingTableSupplier.refreshTable(); + assertThat(cachingTableSupplier.get()).isEqualTo(initialTable); + + // refresh after waiting past the min reload interval + Awaitility.await() + .atLeast(100, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + cachingTableSupplier.refreshTable(); + assertThat(cachingTableSupplier.get()).isEqualTo(loadedTable); + }); + } +} diff --git a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestCompressionSettings.java b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestCompressionSettings.java index 49f472b7325e..d9d57fb7107e 100644 --- a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestCompressionSettings.java +++ b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestCompressionSettings.java @@ -134,11 +134,11 @@ public void testCompressionParquet() throws Exception { if (initProperties.get(TableProperties.PARQUET_COMPRESSION) == null) { Assert.assertEquals( - TableProperties.PARQUET_COMPRESSION_DEFAULT, + TableProperties.PARQUET_COMPRESSION_DEFAULT_SINCE_1_4_0, resultProperties.get(TableProperties.PARQUET_COMPRESSION)); Assert.assertEquals( TableProperties.PARQUET_COMPRESSION_LEVEL_DEFAULT, - resultProperties.get(TableProperties.PARQUET_COMPRESSION_LEVEL)); + resultProperties.get(TableProperties.PARQUET_COMPRESSION_LEVEL_DEFAULT)); } else { Assert.assertEquals( initProperties.get(TableProperties.PARQUET_COMPRESSION), @@ -215,7 +215,7 @@ private static OneInputStreamOperatorTestHarness createIce icebergTable, override, new org.apache.flink.configuration.Configuration()); IcebergStreamWriter streamWriter = - FlinkSink.createStreamWriter(icebergTable, flinkWriteConfig, flinkRowType, null); + FlinkSink.createStreamWriter(() -> icebergTable, flinkWriteConfig, flinkRowType, null); OneInputStreamOperatorTestHarness harness = new OneInputStreamOperatorTestHarness<>(streamWriter, 1, 1, 0); diff --git a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkIcebergSink.java b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkIcebergSink.java index 23beb19a72f2..d771fe140b1a 100644 --- a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkIcebergSink.java +++ b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkIcebergSink.java @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.table.api.TableSchema; @@ -390,4 +391,28 @@ public void testOverrideWriteConfigWithUnknownFileFormat() { return null; }); } + + @Test + public void testWriteRowWithTableRefreshInterval() throws Exception { + List rows = Lists.newArrayList(Row.of(1, "hello"), Row.of(2, "world"), Row.of(3, "foo")); + DataStream dataStream = + env.addSource(createBoundedSource(rows), ROW_TYPE_INFO) + .map(CONVERTER::toInternal, FlinkCompatibilityUtil.toTypeInfo(SimpleDataUtil.ROW_TYPE)); + + Configuration flinkConf = new Configuration(); + flinkConf.setString(FlinkWriteOptions.TABLE_REFRESH_INTERVAL.key(), "100ms"); + + FlinkSink.forRowData(dataStream) + .table(table) + .tableLoader(tableLoader) + .flinkConf(flinkConf) + .writeParallelism(parallelism) + .append(); + + // Execute the program. + env.execute("Test Iceberg DataStream"); + + // Assert the iceberg table's records. + SimpleDataUtil.assertTableRows(table, convertToRowData(rows)); + } } diff --git a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkManifest.java b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkManifest.java index f171485a90f7..ce1f208a4b07 100644 --- a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkManifest.java +++ b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkManifest.java @@ -34,7 +34,6 @@ import org.apache.iceberg.DataFile; import org.apache.iceberg.DeleteFile; import org.apache.iceberg.FileFormat; -import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.ManifestFile; import org.apache.iceberg.ManifestFiles; import org.apache.iceberg.Table; @@ -94,7 +93,8 @@ public void testIO() throws IOException { String operatorId = newOperatorUniqueId(); for (long checkpointId = 1; checkpointId <= 3; checkpointId++) { ManifestOutputFileFactory factory = - FlinkManifestUtil.createOutputFileFactory(table, flinkJobId, operatorId, 1, 1); + FlinkManifestUtil.createOutputFileFactory( + () -> table, table.properties(), flinkJobId, operatorId, 1, 1); final long curCkpId = checkpointId; List dataFiles = generateDataFiles(10); @@ -135,14 +135,7 @@ public void testUserProvidedManifestLocation() throws IOException { Map props = ImmutableMap.of(FLINK_MANIFEST_LOCATION, userProvidedFolder.getAbsolutePath() + "///"); ManifestOutputFileFactory factory = - new ManifestOutputFileFactory( - ((HasTableOperations) table).operations(), - table.io(), - props, - flinkJobId, - operatorId, - 1, - 1); + new ManifestOutputFileFactory(() -> table, props, flinkJobId, operatorId, 1, 1); List dataFiles = generateDataFiles(5); DeltaManifests deltaManifests = @@ -177,7 +170,8 @@ public void testVersionedSerializer() throws IOException { String flinkJobId = newFlinkJobId(); String operatorId = newOperatorUniqueId(); ManifestOutputFileFactory factory = - FlinkManifestUtil.createOutputFileFactory(table, flinkJobId, operatorId, 1, 1); + FlinkManifestUtil.createOutputFileFactory( + () -> table, table.properties(), flinkJobId, operatorId, 1, 1); List dataFiles = generateDataFiles(10); List eqDeleteFiles = generateEqDeleteFiles(10); @@ -214,7 +208,8 @@ public void testCompatibility() throws IOException { String flinkJobId = newFlinkJobId(); String operatorId = newOperatorUniqueId(); ManifestOutputFileFactory factory = - FlinkManifestUtil.createOutputFileFactory(table, flinkJobId, operatorId, 1, 1); + FlinkManifestUtil.createOutputFileFactory( + () -> table, table.properties(), flinkJobId, operatorId, 1, 1); List dataFiles = generateDataFiles(10); ManifestFile manifest = diff --git a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergStreamWriter.java b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergStreamWriter.java index fa69c5d4d1fd..0968f89f55e0 100644 --- a/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergStreamWriter.java +++ b/flink/v1.15/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergStreamWriter.java @@ -376,7 +376,7 @@ private OneInputStreamOperatorTestHarness createIcebergStr icebergTable, Maps.newHashMap(), new org.apache.flink.configuration.Configuration()); IcebergStreamWriter streamWriter = - FlinkSink.createStreamWriter(icebergTable, flinkWriteConfig, flinkRowType, null); + FlinkSink.createStreamWriter(() -> icebergTable, flinkWriteConfig, flinkRowType, null); OneInputStreamOperatorTestHarness harness = new OneInputStreamOperatorTestHarness<>(streamWriter, 1, 1, 0); diff --git a/flink/v1.16/build.gradle b/flink/v1.16/build.gradle index bdddc8bf3e74..2d5e6f326b01 100644 --- a/flink/v1.16/build.gradle +++ b/flink/v1.16/build.gradle @@ -114,6 +114,11 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { } testImplementation libs.awaitility + testImplementation libs.assertj.core + } + + test { + useJUnitPlatform() } } diff --git a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/FlinkConfParser.java b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/FlinkConfParser.java index 65717089d0d8..7167859e600c 100644 --- a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/FlinkConfParser.java +++ b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/FlinkConfParser.java @@ -18,11 +18,13 @@ */ package org.apache.iceberg.flink; +import java.time.Duration; import java.util.List; import java.util.Map; import java.util.function.Function; import org.apache.flink.configuration.ConfigOption; import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.util.TimeUtils; import org.apache.iceberg.Table; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -59,6 +61,10 @@ public StringConfParser stringConf() { return new StringConfParser(); } + public DurationConfParser durationConf() { + return new DurationConfParser(); + } + class BooleanConfParser extends ConfParser { private Boolean defaultValue; @@ -180,6 +186,29 @@ public E parseOptional() { } } + class DurationConfParser extends ConfParser { + private Duration defaultValue; + + @Override + protected DurationConfParser self() { + return this; + } + + public DurationConfParser defaultValue(Duration value) { + this.defaultValue = value; + return self(); + } + + public Duration parse() { + Preconditions.checkArgument(defaultValue != null, "Default value cannot be null"); + return parse(TimeUtils::parseDuration, defaultValue); + } + + public Duration parseOptional() { + return parse(TimeUtils::parseDuration, null); + } + } + abstract class ConfParser { private final List optionNames = Lists.newArrayList(); private String tablePropertyName; diff --git a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteConf.java b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteConf.java index aba23389f2fe..ca7b1120bc81 100644 --- a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteConf.java +++ b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteConf.java @@ -18,7 +18,9 @@ */ package org.apache.iceberg.flink; +import java.time.Duration; import java.util.Map; +import org.apache.flink.annotation.Experimental; import org.apache.flink.configuration.ReadableConfig; import org.apache.iceberg.DistributionMode; import org.apache.iceberg.FileFormat; @@ -184,4 +186,20 @@ public String branch() { public Integer writeParallelism() { return confParser.intConf().option(FlinkWriteOptions.WRITE_PARALLELISM.key()).parseOptional(); } + + /** + * NOTE: This may be removed or changed in a future release. This value specifies the interval for + * refreshing the table instances in sink writer subtasks. If not specified then the default + * behavior is to not refresh the table. + * + * @return the interval for refreshing the table in sink writer subtasks + */ + @Experimental + public Duration tableRefreshInterval() { + return confParser + .durationConf() + .option(FlinkWriteOptions.TABLE_REFRESH_INTERVAL.key()) + .flinkConfig(FlinkWriteOptions.TABLE_REFRESH_INTERVAL) + .parseOptional(); + } } diff --git a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteOptions.java b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteOptions.java index ba0931318e0d..df73f2e09cac 100644 --- a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteOptions.java +++ b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteOptions.java @@ -18,6 +18,8 @@ */ package org.apache.iceberg.flink; +import java.time.Duration; +import org.apache.flink.annotation.Experimental; import org.apache.flink.configuration.ConfigOption; import org.apache.flink.configuration.ConfigOptions; import org.apache.iceberg.SnapshotRef; @@ -64,4 +66,8 @@ private FlinkWriteOptions() {} public static final ConfigOption WRITE_PARALLELISM = ConfigOptions.key("write-parallelism").intType().noDefaultValue(); + + @Experimental + public static final ConfigOption TABLE_REFRESH_INTERVAL = + ConfigOptions.key("table-refresh-interval").durationType().noDefaultValue(); } diff --git a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/TableLoader.java b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/TableLoader.java index f18c5ccda1f6..da509451fee7 100644 --- a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/TableLoader.java +++ b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/TableLoader.java @@ -38,6 +38,8 @@ public interface TableLoader extends Closeable, Serializable, Cloneable { void open(); + boolean isOpen(); + Table loadTable(); /** Clone a TableLoader */ @@ -75,6 +77,11 @@ public void open() { tables = new HadoopTables(hadoopConf.get()); } + @Override + public boolean isOpen() { + return tables != null; + } + @Override public Table loadTable() { FlinkEnvironmentContext.init(); @@ -115,6 +122,11 @@ public void open() { catalog = catalogLoader.loadCatalog(); } + @Override + public boolean isOpen() { + return catalog != null; + } + @Override public Table loadTable() { FlinkEnvironmentContext.init(); @@ -126,6 +138,8 @@ public void close() throws IOException { if (catalog instanceof Closeable) { ((Closeable) catalog).close(); } + + catalog = null; } @Override diff --git a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/CachingTableSupplier.java b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/CachingTableSupplier.java new file mode 100644 index 000000000000..e9f9786f9190 --- /dev/null +++ b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/CachingTableSupplier.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.flink.sink; + +import java.time.Duration; +import org.apache.flink.util.Preconditions; +import org.apache.iceberg.SerializableTable; +import org.apache.iceberg.Table; +import org.apache.iceberg.flink.TableLoader; +import org.apache.iceberg.util.DateTimeUtil; +import org.apache.iceberg.util.SerializableSupplier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A table loader that will only reload a table after a certain interval has passed. WARNING: This + * table loader should be used carefully when used with writer tasks. It could result in heavy load + * on a catalog for jobs with many writers. + */ +class CachingTableSupplier implements SerializableSupplier
{ + + private static final Logger LOG = LoggerFactory.getLogger(CachingTableSupplier.class); + + private final Table initialTable; + private final TableLoader tableLoader; + private final Duration tableRefreshInterval; + private long lastLoadTimeMillis; + private transient Table table; + + CachingTableSupplier( + SerializableTable initialTable, TableLoader tableLoader, Duration tableRefreshInterval) { + Preconditions.checkArgument(initialTable != null, "initialTable cannot be null"); + Preconditions.checkArgument(tableLoader != null, "tableLoader cannot be null"); + Preconditions.checkArgument( + tableRefreshInterval != null, "tableRefreshInterval cannot be null"); + this.initialTable = initialTable; + this.table = initialTable; + this.tableLoader = tableLoader; + this.tableRefreshInterval = tableRefreshInterval; + this.lastLoadTimeMillis = System.currentTimeMillis(); + } + + @Override + public Table get() { + if (table == null) { + this.table = initialTable; + } + return table; + } + + Table initialTable() { + return initialTable; + } + + void refreshTable() { + if (System.currentTimeMillis() > lastLoadTimeMillis + tableRefreshInterval.toMillis()) { + try { + if (!tableLoader.isOpen()) { + tableLoader.open(); + } + + this.table = tableLoader.loadTable(); + this.lastLoadTimeMillis = System.currentTimeMillis(); + + LOG.info( + "Table {} reloaded, next min load time threshold is {}", + table.name(), + DateTimeUtil.formatTimestampMillis( + lastLoadTimeMillis + tableRefreshInterval.toMillis())); + } catch (Exception e) { + LOG.warn("An error occurred reloading table {}, table was not reloaded", table.name(), e); + } + } + } +} diff --git a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkManifestUtil.java b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkManifestUtil.java index 00d55f937cc4..c7e8a2dea7cb 100644 --- a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkManifestUtil.java +++ b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkManifestUtil.java @@ -24,13 +24,11 @@ import java.util.function.Supplier; import org.apache.iceberg.DataFile; import org.apache.iceberg.DeleteFile; -import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.ManifestFile; import org.apache.iceberg.ManifestFiles; import org.apache.iceberg.ManifestWriter; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Table; -import org.apache.iceberg.TableOperations; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.OutputFile; @@ -64,16 +62,14 @@ static List readDataFiles( } static ManifestOutputFileFactory createOutputFileFactory( - Table table, String flinkJobId, String operatorUniqueId, int subTaskId, long attemptNumber) { - TableOperations ops = ((HasTableOperations) table).operations(); + Supplier
tableSupplier, + Map tableProps, + String flinkJobId, + String operatorUniqueId, + int subTaskId, + long attemptNumber) { return new ManifestOutputFileFactory( - ops, - table.io(), - table.properties(), - flinkJobId, - operatorUniqueId, - subTaskId, - attemptNumber); + tableSupplier, tableProps, flinkJobId, operatorUniqueId, subTaskId, attemptNumber); } /** diff --git a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkSink.java b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkSink.java index 023702790116..58828799255d 100644 --- a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkSink.java +++ b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkSink.java @@ -28,6 +28,7 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.time.Duration; import java.util.List; import java.util.Map; import java.util.Set; @@ -67,6 +68,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.TypeUtil; +import org.apache.iceberg.util.SerializableSupplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -330,7 +332,10 @@ private DataStreamSink chainIcebergOperators() { DataStream rowDataInput = inputCreator.apply(uidPrefix); if (table == null) { - tableLoader.open(); + if (!tableLoader.isOpen()) { + tableLoader.open(); + } + try (TableLoader loader = tableLoader) { this.table = loader.loadTable(); } catch (IOException e) { @@ -462,8 +467,19 @@ private SingleOutputStreamOperator appendWriter( } } + SerializableTable serializableTable = (SerializableTable) SerializableTable.copyOf(table); + Duration tableRefreshInterval = flinkWriteConf.tableRefreshInterval(); + + SerializableSupplier
tableSupplier; + if (tableRefreshInterval != null) { + tableSupplier = + new CachingTableSupplier(serializableTable, tableLoader, tableRefreshInterval); + } else { + tableSupplier = () -> serializableTable; + } + IcebergStreamWriter streamWriter = - createStreamWriter(table, flinkWriteConf, flinkRowType, equalityFieldIds); + createStreamWriter(tableSupplier, flinkWriteConf, flinkRowType, equalityFieldIds); int parallelism = flinkWriteConf.writeParallelism() == null @@ -580,24 +596,25 @@ static RowType toFlinkRowType(Schema schema, TableSchema requestedSchema) { } static IcebergStreamWriter createStreamWriter( - Table table, + SerializableSupplier
tableSupplier, FlinkWriteConf flinkWriteConf, RowType flinkRowType, List equalityFieldIds) { - Preconditions.checkArgument(table != null, "Iceberg table shouldn't be null"); + Preconditions.checkArgument(tableSupplier != null, "Iceberg table supplier shouldn't be null"); - Table serializableTable = SerializableTable.copyOf(table); + Table initTable = tableSupplier.get(); FileFormat format = flinkWriteConf.dataFileFormat(); TaskWriterFactory taskWriterFactory = new RowDataTaskWriterFactory( - serializableTable, + tableSupplier, flinkRowType, flinkWriteConf.targetDataFileSize(), format, - writeProperties(table, format, flinkWriteConf), + writeProperties(initTable, format, flinkWriteConf), equalityFieldIds, flinkWriteConf.upsertMode()); - return new IcebergStreamWriter<>(table.name(), taskWriterFactory); + + return new IcebergStreamWriter<>(initTable.name(), taskWriterFactory); } /** diff --git a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergFilesCommitter.java b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergFilesCommitter.java index 3805ab298428..b9bceaa9311d 100644 --- a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergFilesCommitter.java +++ b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergFilesCommitter.java @@ -160,7 +160,7 @@ public void initializeState(StateInitializationContext context) throws Exception int attemptId = getRuntimeContext().getAttemptNumber(); this.manifestOutputFileFactory = FlinkManifestUtil.createOutputFileFactory( - table, flinkJobId, operatorUniqueId, subTaskId, attemptId); + () -> table, table.properties(), flinkJobId, operatorUniqueId, subTaskId, attemptId); this.maxCommittedCheckpointId = INITIAL_CHECKPOINT_ID; this.checkpointsState = context.getOperatorStateStore().getListState(STATE_DESCRIPTOR); @@ -247,6 +247,9 @@ public void notifyCheckpointComplete(long checkpointId) throws Exception { checkpointId, maxCommittedCheckpointId); } + + // reload the table in case new configuration is needed + this.table = tableLoader.loadTable(); } private void commitUpToCheckpoint( diff --git a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/ManifestOutputFileFactory.java b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/ManifestOutputFileFactory.java index 045e45a4ceae..da5e6e7627ae 100644 --- a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/ManifestOutputFileFactory.java +++ b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/ManifestOutputFileFactory.java @@ -20,9 +20,11 @@ import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; import org.apache.iceberg.FileFormat; +import org.apache.iceberg.HasTableOperations; +import org.apache.iceberg.Table; import org.apache.iceberg.TableOperations; -import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.relocated.com.google.common.base.Strings; @@ -31,8 +33,7 @@ class ManifestOutputFileFactory { // properties. static final String FLINK_MANIFEST_LOCATION = "flink.manifests.location"; - private final TableOperations ops; - private final FileIO io; + private final Supplier
tableSupplier; private final Map props; private final String flinkJobId; private final String operatorUniqueId; @@ -41,15 +42,13 @@ class ManifestOutputFileFactory { private final AtomicInteger fileCount = new AtomicInteger(0); ManifestOutputFileFactory( - TableOperations ops, - FileIO io, + Supplier
tableSupplier, Map props, String flinkJobId, String operatorUniqueId, int subTaskId, long attemptNumber) { - this.ops = ops; - this.io = io; + this.tableSupplier = tableSupplier; this.props = props; this.flinkJobId = flinkJobId; this.operatorUniqueId = operatorUniqueId; @@ -71,6 +70,7 @@ private String generatePath(long checkpointId) { OutputFile create(long checkpointId) { String flinkManifestDir = props.get(FLINK_MANIFEST_LOCATION); + TableOperations ops = ((HasTableOperations) tableSupplier.get()).operations(); String newManifestFullPath; if (Strings.isNullOrEmpty(flinkManifestDir)) { @@ -81,7 +81,7 @@ OutputFile create(long checkpointId) { String.format("%s/%s", stripTrailingSlash(flinkManifestDir), generatePath(checkpointId)); } - return io.newOutputFile(newManifestFullPath); + return tableSupplier.get().io().newOutputFile(newManifestFullPath); } private static String stripTrailingSlash(String path) { diff --git a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/RowDataTaskWriterFactory.java b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/RowDataTaskWriterFactory.java index c624eb3f0276..67422a1afeb1 100644 --- a/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/RowDataTaskWriterFactory.java +++ b/flink/v1.16/flink/src/main/java/org/apache/iceberg/flink/sink/RowDataTaskWriterFactory.java @@ -20,6 +20,7 @@ import java.util.List; import java.util.Map; +import java.util.function.Supplier; import org.apache.flink.table.data.RowData; import org.apache.flink.table.types.logical.RowType; import org.apache.iceberg.FileFormat; @@ -38,13 +39,13 @@ import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.util.ArrayUtil; +import org.apache.iceberg.util.SerializableSupplier; public class RowDataTaskWriterFactory implements TaskWriterFactory { - private final Table table; + private final Supplier
tableSupplier; private final Schema schema; private final RowType flinkSchema; private final PartitionSpec spec; - private final FileIO io; private final long targetFileSizeBytes; private final FileFormat format; private final List equalityFieldIds; @@ -61,11 +62,37 @@ public RowDataTaskWriterFactory( Map writeProperties, List equalityFieldIds, boolean upsert) { - this.table = table; + this( + () -> table, + flinkSchema, + targetFileSizeBytes, + format, + writeProperties, + equalityFieldIds, + upsert); + } + + public RowDataTaskWriterFactory( + SerializableSupplier
tableSupplier, + RowType flinkSchema, + long targetFileSizeBytes, + FileFormat format, + Map writeProperties, + List equalityFieldIds, + boolean upsert) { + this.tableSupplier = tableSupplier; + + Table table; + if (tableSupplier instanceof CachingTableSupplier) { + // rely on the initial table metadata for schema, etc., until schema evolution is supported + table = ((CachingTableSupplier) tableSupplier).initialTable(); + } else { + table = tableSupplier.get(); + } + this.schema = table.schema(); this.flinkSchema = flinkSchema; this.spec = table.spec(); - this.io = table.io(); this.targetFileSizeBytes = targetFileSizeBytes; this.format = format; this.equalityFieldIds = equalityFieldIds; @@ -107,8 +134,21 @@ public RowDataTaskWriterFactory( @Override public void initialize(int taskId, int attemptId) { + Table table; + if (tableSupplier instanceof CachingTableSupplier) { + // rely on the initial table metadata for schema, etc., until schema evolution is supported + table = ((CachingTableSupplier) tableSupplier).initialTable(); + } else { + table = tableSupplier.get(); + } + + refreshTable(); + this.outputFileFactory = - OutputFileFactory.builderFor(table, taskId, attemptId).format(format).build(); + OutputFileFactory.builderFor(table, taskId, attemptId) + .format(format) + .ioSupplier(() -> tableSupplier.get().io()) + .build(); } @Override @@ -117,18 +157,25 @@ public TaskWriter create() { outputFileFactory, "The outputFileFactory shouldn't be null if we have invoked the initialize()."); + refreshTable(); + if (equalityFieldIds == null || equalityFieldIds.isEmpty()) { // Initialize a task writer to write INSERT only. if (spec.isUnpartitioned()) { return new UnpartitionedWriter<>( - spec, format, appenderFactory, outputFileFactory, io, targetFileSizeBytes); + spec, + format, + appenderFactory, + outputFileFactory, + tableSupplier.get().io(), + targetFileSizeBytes); } else { return new RowDataPartitionedFanoutWriter( spec, format, appenderFactory, outputFileFactory, - io, + tableSupplier.get().io(), targetFileSizeBytes, schema, flinkSchema); @@ -141,7 +188,7 @@ public TaskWriter create() { format, appenderFactory, outputFileFactory, - io, + tableSupplier.get().io(), targetFileSizeBytes, schema, flinkSchema, @@ -153,7 +200,7 @@ public TaskWriter create() { format, appenderFactory, outputFileFactory, - io, + tableSupplier.get().io(), targetFileSizeBytes, schema, flinkSchema, @@ -163,6 +210,12 @@ public TaskWriter create() { } } + void refreshTable() { + if (tableSupplier instanceof CachingTableSupplier) { + ((CachingTableSupplier) tableSupplier).refreshTable(); + } + } + private static class RowDataPartitionedFanoutWriter extends PartitionedFanoutWriter { private final PartitionKey partitionKey; diff --git a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/TestFlinkCatalogTable.java b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/TestFlinkCatalogTable.java index d907a58ec2bc..7f47b70286f3 100644 --- a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/TestFlinkCatalogTable.java +++ b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/TestFlinkCatalogTable.java @@ -18,6 +18,8 @@ */ package org.apache.iceberg.flink; +import static org.assertj.core.api.Assertions.assertThat; + import java.util.Arrays; import java.util.Collections; import java.util.Map; @@ -48,7 +50,6 @@ import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Maps; @@ -121,12 +122,10 @@ public void testCreateTable() throws TableNotExistException { Assert.assertEquals( new Schema(Types.NestedField.optional(1, "id", Types.LongType.get())).asStruct(), table.schema().asStruct()); - Assert.assertEquals(Maps.newHashMap(), table.properties()); CatalogTable catalogTable = catalogTable("tl"); Assert.assertEquals( TableSchema.builder().field("id", DataTypes.BIGINT()).build(), catalogTable.getSchema()); - Assert.assertEquals(Maps.newHashMap(), catalogTable.getOptions()); } @Test @@ -176,7 +175,7 @@ public void testCreateTableIfNotExists() { sql("CREATE TABLE tl(id BIGINT)"); // Assert that table does exist. - Assert.assertEquals(Maps.newHashMap(), table("tl").properties()); + assertThat(table("tl")).isNotNull(); sql("DROP TABLE tl"); AssertHelpers.assertThrows( @@ -186,15 +185,13 @@ public void testCreateTableIfNotExists() { () -> table("tl")); sql("CREATE TABLE IF NOT EXISTS tl(id BIGINT)"); - Assert.assertEquals(Maps.newHashMap(), table("tl").properties()); + assertThat(table("tl").properties()).doesNotContainKey("key"); - final Map expectedProperties = ImmutableMap.of("key", "value"); table("tl").updateProperties().set("key", "value").commit(); - Assert.assertEquals(expectedProperties, table("tl").properties()); + assertThat(table("tl").properties()).containsEntry("key", "value"); sql("CREATE TABLE IF NOT EXISTS tl(id BIGINT)"); - Assert.assertEquals( - "Should still be the old table.", expectedProperties, table("tl").properties()); + assertThat(table("tl").properties()).containsEntry("key", "value"); } @Test @@ -206,12 +203,10 @@ public void testCreateTableLike() throws TableNotExistException { Assert.assertEquals( new Schema(Types.NestedField.optional(1, "id", Types.LongType.get())).asStruct(), table.schema().asStruct()); - Assert.assertEquals(Maps.newHashMap(), table.properties()); CatalogTable catalogTable = catalogTable("tl2"); Assert.assertEquals( TableSchema.builder().field("id", DataTypes.BIGINT()).build(), catalogTable.getSchema()); - Assert.assertEquals(Maps.newHashMap(), catalogTable.getOptions()); } @Test @@ -226,7 +221,6 @@ public void testCreateTableLocation() { new Schema(Types.NestedField.optional(1, "id", Types.LongType.get())).asStruct(), table.schema().asStruct()); Assert.assertEquals("file:///tmp/location", table.location()); - Assert.assertEquals(Maps.newHashMap(), table.properties()); } @Test @@ -242,7 +236,6 @@ public void testCreatePartitionTable() throws TableNotExistException { table.schema().asStruct()); Assert.assertEquals( PartitionSpec.builderFor(table.schema()).identity("dt").build(), table.spec()); - Assert.assertEquals(Maps.newHashMap(), table.properties()); CatalogTable catalogTable = catalogTable("tl"); Assert.assertEquals( @@ -251,7 +244,6 @@ public void testCreatePartitionTable() throws TableNotExistException { .field("dt", DataTypes.STRING()) .build(), catalogTable.getSchema()); - Assert.assertEquals(Maps.newHashMap(), catalogTable.getOptions()); Assert.assertEquals(Collections.singletonList("dt"), catalogTable.getPartitionKeys()); } @@ -304,7 +296,6 @@ public void testLoadTransformPartitionTable() throws TableNotExistException { CatalogTable catalogTable = catalogTable("tl"); Assert.assertEquals( TableSchema.builder().field("id", DataTypes.BIGINT()).build(), catalogTable.getSchema()); - Assert.assertEquals(Maps.newHashMap(), catalogTable.getOptions()); Assert.assertEquals(Collections.emptyList(), catalogTable.getPartitionKeys()); } @@ -317,12 +308,12 @@ public void testAlterTable() throws TableNotExistException { // new sql("ALTER TABLE tl SET('newK'='newV')"); properties.put("newK", "newV"); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); // update old sql("ALTER TABLE tl SET('oldK'='oldV2')"); properties.put("oldK", "oldV2"); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); // remove property CatalogTable catalogTable = catalogTable("tl"); @@ -331,7 +322,7 @@ public void testAlterTable() throws TableNotExistException { .getCatalog(getTableEnv().getCurrentCatalog()) .get() .alterTable(new ObjectPath(DATABASE, "tl"), catalogTable.copy(properties), false); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); } @Test @@ -343,12 +334,12 @@ public void testAlterTableWithPrimaryKey() throws TableNotExistException { // new sql("ALTER TABLE tl SET('newK'='newV')"); properties.put("newK", "newV"); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); // update old sql("ALTER TABLE tl SET('oldK'='oldV2')"); properties.put("oldK", "oldV2"); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); // remove property CatalogTable catalogTable = catalogTable("tl"); @@ -357,7 +348,7 @@ public void testAlterTableWithPrimaryKey() throws TableNotExistException { .getCatalog(getTableEnv().getCurrentCatalog()) .get() .alterTable(new ObjectPath(DATABASE, "tl"), catalogTable.copy(properties), false); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); } @Test diff --git a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/TestFlinkConfParser.java b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/TestFlinkConfParser.java new file mode 100644 index 000000000000..4b6ac25ab8e3 --- /dev/null +++ b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/TestFlinkConfParser.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.flink; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.time.Duration; +import java.util.Map; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.configuration.Configuration; +import org.apache.iceberg.Table; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; + +public class TestFlinkConfParser { + + @Test + public void testDurationConf() { + Map writeOptions = ImmutableMap.of("write-prop", "111s"); + + ConfigOption configOption = + ConfigOptions.key("conf-prop").durationType().noDefaultValue(); + Configuration flinkConf = new Configuration(); + flinkConf.setString(configOption.key(), "222s"); + + Table table = mock(Table.class); + when(table.properties()).thenReturn(ImmutableMap.of("table-prop", "333s")); + + FlinkConfParser confParser = new FlinkConfParser(table, writeOptions, flinkConf); + Duration defaultVal = Duration.ofMillis(999); + + Duration result = + confParser.durationConf().option("write-prop").defaultValue(defaultVal).parse(); + assertThat(result).isEqualTo(Duration.ofSeconds(111)); + + result = confParser.durationConf().flinkConfig(configOption).defaultValue(defaultVal).parse(); + assertThat(result).isEqualTo(Duration.ofSeconds(222)); + + result = confParser.durationConf().tableProperty("table-prop").defaultValue(defaultVal).parse(); + assertThat(result).isEqualTo(Duration.ofSeconds(333)); + } +} diff --git a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/TestTableLoader.java b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/TestTableLoader.java index 93e97d5aa3bc..4ad302dde436 100644 --- a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/TestTableLoader.java +++ b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/TestTableLoader.java @@ -36,6 +36,11 @@ public TestTableLoader(String dir) { @Override public void open() {} + @Override + public boolean isOpen() { + return true; + } + @Override public Table loadTable() { return TestTables.load(dir, "test"); diff --git a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestCachingTableSupplier.java b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestCachingTableSupplier.java new file mode 100644 index 000000000000..360db658cd2f --- /dev/null +++ b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestCachingTableSupplier.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.flink.sink; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.time.Duration; +import java.util.concurrent.TimeUnit; +import org.apache.iceberg.SerializableTable; +import org.apache.iceberg.Table; +import org.apache.iceberg.flink.TableLoader; +import org.awaitility.Awaitility; +import org.junit.jupiter.api.Test; + +public class TestCachingTableSupplier { + + @Test + public void testCheckArguments() { + SerializableTable initialTable = mock(SerializableTable.class); + + Table loadedTable = mock(Table.class); + TableLoader tableLoader = mock(TableLoader.class); + when(tableLoader.loadTable()).thenReturn(loadedTable); + + new CachingTableSupplier(initialTable, tableLoader, Duration.ofMillis(100)); + + assertThatThrownBy(() -> new CachingTableSupplier(initialTable, tableLoader, null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("tableRefreshInterval cannot be null"); + assertThatThrownBy(() -> new CachingTableSupplier(null, tableLoader, Duration.ofMillis(100))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("initialTable cannot be null"); + assertThatThrownBy(() -> new CachingTableSupplier(initialTable, null, Duration.ofMillis(100))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("tableLoader cannot be null"); + } + + @Test + public void testTableReload() { + SerializableTable initialTable = mock(SerializableTable.class); + + Table loadedTable = mock(Table.class); + TableLoader tableLoader = mock(TableLoader.class); + when(tableLoader.loadTable()).thenReturn(loadedTable); + + CachingTableSupplier cachingTableSupplier = + new CachingTableSupplier(initialTable, tableLoader, Duration.ofMillis(100)); + + // refresh shouldn't do anything as the min reload interval hasn't passed + cachingTableSupplier.refreshTable(); + assertThat(cachingTableSupplier.get()).isEqualTo(initialTable); + + // refresh after waiting past the min reload interval + Awaitility.await() + .atLeast(100, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + cachingTableSupplier.refreshTable(); + assertThat(cachingTableSupplier.get()).isEqualTo(loadedTable); + }); + } +} diff --git a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestCompressionSettings.java b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestCompressionSettings.java index 49f472b7325e..214c3de1063e 100644 --- a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestCompressionSettings.java +++ b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestCompressionSettings.java @@ -134,7 +134,7 @@ public void testCompressionParquet() throws Exception { if (initProperties.get(TableProperties.PARQUET_COMPRESSION) == null) { Assert.assertEquals( - TableProperties.PARQUET_COMPRESSION_DEFAULT, + TableProperties.PARQUET_COMPRESSION_DEFAULT_SINCE_1_4_0, resultProperties.get(TableProperties.PARQUET_COMPRESSION)); Assert.assertEquals( TableProperties.PARQUET_COMPRESSION_LEVEL_DEFAULT, @@ -215,7 +215,7 @@ private static OneInputStreamOperatorTestHarness createIce icebergTable, override, new org.apache.flink.configuration.Configuration()); IcebergStreamWriter streamWriter = - FlinkSink.createStreamWriter(icebergTable, flinkWriteConfig, flinkRowType, null); + FlinkSink.createStreamWriter(() -> icebergTable, flinkWriteConfig, flinkRowType, null); OneInputStreamOperatorTestHarness harness = new OneInputStreamOperatorTestHarness<>(streamWriter, 1, 1, 0); diff --git a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkIcebergSink.java b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkIcebergSink.java index 23beb19a72f2..d771fe140b1a 100644 --- a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkIcebergSink.java +++ b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkIcebergSink.java @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.table.api.TableSchema; @@ -390,4 +391,28 @@ public void testOverrideWriteConfigWithUnknownFileFormat() { return null; }); } + + @Test + public void testWriteRowWithTableRefreshInterval() throws Exception { + List rows = Lists.newArrayList(Row.of(1, "hello"), Row.of(2, "world"), Row.of(3, "foo")); + DataStream dataStream = + env.addSource(createBoundedSource(rows), ROW_TYPE_INFO) + .map(CONVERTER::toInternal, FlinkCompatibilityUtil.toTypeInfo(SimpleDataUtil.ROW_TYPE)); + + Configuration flinkConf = new Configuration(); + flinkConf.setString(FlinkWriteOptions.TABLE_REFRESH_INTERVAL.key(), "100ms"); + + FlinkSink.forRowData(dataStream) + .table(table) + .tableLoader(tableLoader) + .flinkConf(flinkConf) + .writeParallelism(parallelism) + .append(); + + // Execute the program. + env.execute("Test Iceberg DataStream"); + + // Assert the iceberg table's records. + SimpleDataUtil.assertTableRows(table, convertToRowData(rows)); + } } diff --git a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkManifest.java b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkManifest.java index f171485a90f7..ce1f208a4b07 100644 --- a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkManifest.java +++ b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkManifest.java @@ -34,7 +34,6 @@ import org.apache.iceberg.DataFile; import org.apache.iceberg.DeleteFile; import org.apache.iceberg.FileFormat; -import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.ManifestFile; import org.apache.iceberg.ManifestFiles; import org.apache.iceberg.Table; @@ -94,7 +93,8 @@ public void testIO() throws IOException { String operatorId = newOperatorUniqueId(); for (long checkpointId = 1; checkpointId <= 3; checkpointId++) { ManifestOutputFileFactory factory = - FlinkManifestUtil.createOutputFileFactory(table, flinkJobId, operatorId, 1, 1); + FlinkManifestUtil.createOutputFileFactory( + () -> table, table.properties(), flinkJobId, operatorId, 1, 1); final long curCkpId = checkpointId; List dataFiles = generateDataFiles(10); @@ -135,14 +135,7 @@ public void testUserProvidedManifestLocation() throws IOException { Map props = ImmutableMap.of(FLINK_MANIFEST_LOCATION, userProvidedFolder.getAbsolutePath() + "///"); ManifestOutputFileFactory factory = - new ManifestOutputFileFactory( - ((HasTableOperations) table).operations(), - table.io(), - props, - flinkJobId, - operatorId, - 1, - 1); + new ManifestOutputFileFactory(() -> table, props, flinkJobId, operatorId, 1, 1); List dataFiles = generateDataFiles(5); DeltaManifests deltaManifests = @@ -177,7 +170,8 @@ public void testVersionedSerializer() throws IOException { String flinkJobId = newFlinkJobId(); String operatorId = newOperatorUniqueId(); ManifestOutputFileFactory factory = - FlinkManifestUtil.createOutputFileFactory(table, flinkJobId, operatorId, 1, 1); + FlinkManifestUtil.createOutputFileFactory( + () -> table, table.properties(), flinkJobId, operatorId, 1, 1); List dataFiles = generateDataFiles(10); List eqDeleteFiles = generateEqDeleteFiles(10); @@ -214,7 +208,8 @@ public void testCompatibility() throws IOException { String flinkJobId = newFlinkJobId(); String operatorId = newOperatorUniqueId(); ManifestOutputFileFactory factory = - FlinkManifestUtil.createOutputFileFactory(table, flinkJobId, operatorId, 1, 1); + FlinkManifestUtil.createOutputFileFactory( + () -> table, table.properties(), flinkJobId, operatorId, 1, 1); List dataFiles = generateDataFiles(10); ManifestFile manifest = diff --git a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergStreamWriter.java b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergStreamWriter.java index fa69c5d4d1fd..0968f89f55e0 100644 --- a/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergStreamWriter.java +++ b/flink/v1.16/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergStreamWriter.java @@ -376,7 +376,7 @@ private OneInputStreamOperatorTestHarness createIcebergStr icebergTable, Maps.newHashMap(), new org.apache.flink.configuration.Configuration()); IcebergStreamWriter streamWriter = - FlinkSink.createStreamWriter(icebergTable, flinkWriteConfig, flinkRowType, null); + FlinkSink.createStreamWriter(() -> icebergTable, flinkWriteConfig, flinkRowType, null); OneInputStreamOperatorTestHarness harness = new OneInputStreamOperatorTestHarness<>(streamWriter, 1, 1, 0); diff --git a/flink/v1.17/build.gradle b/flink/v1.17/build.gradle index fdd766449a56..2e081b760878 100644 --- a/flink/v1.17/build.gradle +++ b/flink/v1.17/build.gradle @@ -114,6 +114,11 @@ project(":iceberg-flink:iceberg-flink-${flinkMajorVersion}") { } testImplementation libs.awaitility + testImplementation libs.assertj.core + } + + test { + useJUnitPlatform() } } diff --git a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/FlinkConfParser.java b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/FlinkConfParser.java index 65717089d0d8..7167859e600c 100644 --- a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/FlinkConfParser.java +++ b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/FlinkConfParser.java @@ -18,11 +18,13 @@ */ package org.apache.iceberg.flink; +import java.time.Duration; import java.util.List; import java.util.Map; import java.util.function.Function; import org.apache.flink.configuration.ConfigOption; import org.apache.flink.configuration.ReadableConfig; +import org.apache.flink.util.TimeUtils; import org.apache.iceberg.Table; import org.apache.iceberg.relocated.com.google.common.base.Preconditions; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -59,6 +61,10 @@ public StringConfParser stringConf() { return new StringConfParser(); } + public DurationConfParser durationConf() { + return new DurationConfParser(); + } + class BooleanConfParser extends ConfParser { private Boolean defaultValue; @@ -180,6 +186,29 @@ public E parseOptional() { } } + class DurationConfParser extends ConfParser { + private Duration defaultValue; + + @Override + protected DurationConfParser self() { + return this; + } + + public DurationConfParser defaultValue(Duration value) { + this.defaultValue = value; + return self(); + } + + public Duration parse() { + Preconditions.checkArgument(defaultValue != null, "Default value cannot be null"); + return parse(TimeUtils::parseDuration, defaultValue); + } + + public Duration parseOptional() { + return parse(TimeUtils::parseDuration, null); + } + } + abstract class ConfParser { private final List optionNames = Lists.newArrayList(); private String tablePropertyName; diff --git a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteConf.java b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteConf.java index aba23389f2fe..ca7b1120bc81 100644 --- a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteConf.java +++ b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteConf.java @@ -18,7 +18,9 @@ */ package org.apache.iceberg.flink; +import java.time.Duration; import java.util.Map; +import org.apache.flink.annotation.Experimental; import org.apache.flink.configuration.ReadableConfig; import org.apache.iceberg.DistributionMode; import org.apache.iceberg.FileFormat; @@ -184,4 +186,20 @@ public String branch() { public Integer writeParallelism() { return confParser.intConf().option(FlinkWriteOptions.WRITE_PARALLELISM.key()).parseOptional(); } + + /** + * NOTE: This may be removed or changed in a future release. This value specifies the interval for + * refreshing the table instances in sink writer subtasks. If not specified then the default + * behavior is to not refresh the table. + * + * @return the interval for refreshing the table in sink writer subtasks + */ + @Experimental + public Duration tableRefreshInterval() { + return confParser + .durationConf() + .option(FlinkWriteOptions.TABLE_REFRESH_INTERVAL.key()) + .flinkConfig(FlinkWriteOptions.TABLE_REFRESH_INTERVAL) + .parseOptional(); + } } diff --git a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteOptions.java b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteOptions.java index ba0931318e0d..df73f2e09cac 100644 --- a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteOptions.java +++ b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/FlinkWriteOptions.java @@ -18,6 +18,8 @@ */ package org.apache.iceberg.flink; +import java.time.Duration; +import org.apache.flink.annotation.Experimental; import org.apache.flink.configuration.ConfigOption; import org.apache.flink.configuration.ConfigOptions; import org.apache.iceberg.SnapshotRef; @@ -64,4 +66,8 @@ private FlinkWriteOptions() {} public static final ConfigOption WRITE_PARALLELISM = ConfigOptions.key("write-parallelism").intType().noDefaultValue(); + + @Experimental + public static final ConfigOption TABLE_REFRESH_INTERVAL = + ConfigOptions.key("table-refresh-interval").durationType().noDefaultValue(); } diff --git a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/TableLoader.java b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/TableLoader.java index f18c5ccda1f6..da509451fee7 100644 --- a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/TableLoader.java +++ b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/TableLoader.java @@ -38,6 +38,8 @@ public interface TableLoader extends Closeable, Serializable, Cloneable { void open(); + boolean isOpen(); + Table loadTable(); /** Clone a TableLoader */ @@ -75,6 +77,11 @@ public void open() { tables = new HadoopTables(hadoopConf.get()); } + @Override + public boolean isOpen() { + return tables != null; + } + @Override public Table loadTable() { FlinkEnvironmentContext.init(); @@ -115,6 +122,11 @@ public void open() { catalog = catalogLoader.loadCatalog(); } + @Override + public boolean isOpen() { + return catalog != null; + } + @Override public Table loadTable() { FlinkEnvironmentContext.init(); @@ -126,6 +138,8 @@ public void close() throws IOException { if (catalog instanceof Closeable) { ((Closeable) catalog).close(); } + + catalog = null; } @Override diff --git a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/CachingTableSupplier.java b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/CachingTableSupplier.java new file mode 100644 index 000000000000..e9f9786f9190 --- /dev/null +++ b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/CachingTableSupplier.java @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.flink.sink; + +import java.time.Duration; +import org.apache.flink.util.Preconditions; +import org.apache.iceberg.SerializableTable; +import org.apache.iceberg.Table; +import org.apache.iceberg.flink.TableLoader; +import org.apache.iceberg.util.DateTimeUtil; +import org.apache.iceberg.util.SerializableSupplier; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * A table loader that will only reload a table after a certain interval has passed. WARNING: This + * table loader should be used carefully when used with writer tasks. It could result in heavy load + * on a catalog for jobs with many writers. + */ +class CachingTableSupplier implements SerializableSupplier
{ + + private static final Logger LOG = LoggerFactory.getLogger(CachingTableSupplier.class); + + private final Table initialTable; + private final TableLoader tableLoader; + private final Duration tableRefreshInterval; + private long lastLoadTimeMillis; + private transient Table table; + + CachingTableSupplier( + SerializableTable initialTable, TableLoader tableLoader, Duration tableRefreshInterval) { + Preconditions.checkArgument(initialTable != null, "initialTable cannot be null"); + Preconditions.checkArgument(tableLoader != null, "tableLoader cannot be null"); + Preconditions.checkArgument( + tableRefreshInterval != null, "tableRefreshInterval cannot be null"); + this.initialTable = initialTable; + this.table = initialTable; + this.tableLoader = tableLoader; + this.tableRefreshInterval = tableRefreshInterval; + this.lastLoadTimeMillis = System.currentTimeMillis(); + } + + @Override + public Table get() { + if (table == null) { + this.table = initialTable; + } + return table; + } + + Table initialTable() { + return initialTable; + } + + void refreshTable() { + if (System.currentTimeMillis() > lastLoadTimeMillis + tableRefreshInterval.toMillis()) { + try { + if (!tableLoader.isOpen()) { + tableLoader.open(); + } + + this.table = tableLoader.loadTable(); + this.lastLoadTimeMillis = System.currentTimeMillis(); + + LOG.info( + "Table {} reloaded, next min load time threshold is {}", + table.name(), + DateTimeUtil.formatTimestampMillis( + lastLoadTimeMillis + tableRefreshInterval.toMillis())); + } catch (Exception e) { + LOG.warn("An error occurred reloading table {}, table was not reloaded", table.name(), e); + } + } + } +} diff --git a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkManifestUtil.java b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkManifestUtil.java index 00d55f937cc4..c7e8a2dea7cb 100644 --- a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkManifestUtil.java +++ b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkManifestUtil.java @@ -24,13 +24,11 @@ import java.util.function.Supplier; import org.apache.iceberg.DataFile; import org.apache.iceberg.DeleteFile; -import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.ManifestFile; import org.apache.iceberg.ManifestFiles; import org.apache.iceberg.ManifestWriter; import org.apache.iceberg.PartitionSpec; import org.apache.iceberg.Table; -import org.apache.iceberg.TableOperations; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.OutputFile; @@ -64,16 +62,14 @@ static List readDataFiles( } static ManifestOutputFileFactory createOutputFileFactory( - Table table, String flinkJobId, String operatorUniqueId, int subTaskId, long attemptNumber) { - TableOperations ops = ((HasTableOperations) table).operations(); + Supplier
tableSupplier, + Map tableProps, + String flinkJobId, + String operatorUniqueId, + int subTaskId, + long attemptNumber) { return new ManifestOutputFileFactory( - ops, - table.io(), - table.properties(), - flinkJobId, - operatorUniqueId, - subTaskId, - attemptNumber); + tableSupplier, tableProps, flinkJobId, operatorUniqueId, subTaskId, attemptNumber); } /** diff --git a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkSink.java b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkSink.java index 023702790116..58828799255d 100644 --- a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkSink.java +++ b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/FlinkSink.java @@ -28,6 +28,7 @@ import java.io.IOException; import java.io.UncheckedIOException; +import java.time.Duration; import java.util.List; import java.util.Map; import java.util.Set; @@ -67,6 +68,7 @@ import org.apache.iceberg.relocated.com.google.common.collect.Maps; import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.TypeUtil; +import org.apache.iceberg.util.SerializableSupplier; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -330,7 +332,10 @@ private DataStreamSink chainIcebergOperators() { DataStream rowDataInput = inputCreator.apply(uidPrefix); if (table == null) { - tableLoader.open(); + if (!tableLoader.isOpen()) { + tableLoader.open(); + } + try (TableLoader loader = tableLoader) { this.table = loader.loadTable(); } catch (IOException e) { @@ -462,8 +467,19 @@ private SingleOutputStreamOperator appendWriter( } } + SerializableTable serializableTable = (SerializableTable) SerializableTable.copyOf(table); + Duration tableRefreshInterval = flinkWriteConf.tableRefreshInterval(); + + SerializableSupplier
tableSupplier; + if (tableRefreshInterval != null) { + tableSupplier = + new CachingTableSupplier(serializableTable, tableLoader, tableRefreshInterval); + } else { + tableSupplier = () -> serializableTable; + } + IcebergStreamWriter streamWriter = - createStreamWriter(table, flinkWriteConf, flinkRowType, equalityFieldIds); + createStreamWriter(tableSupplier, flinkWriteConf, flinkRowType, equalityFieldIds); int parallelism = flinkWriteConf.writeParallelism() == null @@ -580,24 +596,25 @@ static RowType toFlinkRowType(Schema schema, TableSchema requestedSchema) { } static IcebergStreamWriter createStreamWriter( - Table table, + SerializableSupplier
tableSupplier, FlinkWriteConf flinkWriteConf, RowType flinkRowType, List equalityFieldIds) { - Preconditions.checkArgument(table != null, "Iceberg table shouldn't be null"); + Preconditions.checkArgument(tableSupplier != null, "Iceberg table supplier shouldn't be null"); - Table serializableTable = SerializableTable.copyOf(table); + Table initTable = tableSupplier.get(); FileFormat format = flinkWriteConf.dataFileFormat(); TaskWriterFactory taskWriterFactory = new RowDataTaskWriterFactory( - serializableTable, + tableSupplier, flinkRowType, flinkWriteConf.targetDataFileSize(), format, - writeProperties(table, format, flinkWriteConf), + writeProperties(initTable, format, flinkWriteConf), equalityFieldIds, flinkWriteConf.upsertMode()); - return new IcebergStreamWriter<>(table.name(), taskWriterFactory); + + return new IcebergStreamWriter<>(initTable.name(), taskWriterFactory); } /** diff --git a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergFilesCommitter.java b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergFilesCommitter.java index 3805ab298428..b9bceaa9311d 100644 --- a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergFilesCommitter.java +++ b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/IcebergFilesCommitter.java @@ -160,7 +160,7 @@ public void initializeState(StateInitializationContext context) throws Exception int attemptId = getRuntimeContext().getAttemptNumber(); this.manifestOutputFileFactory = FlinkManifestUtil.createOutputFileFactory( - table, flinkJobId, operatorUniqueId, subTaskId, attemptId); + () -> table, table.properties(), flinkJobId, operatorUniqueId, subTaskId, attemptId); this.maxCommittedCheckpointId = INITIAL_CHECKPOINT_ID; this.checkpointsState = context.getOperatorStateStore().getListState(STATE_DESCRIPTOR); @@ -247,6 +247,9 @@ public void notifyCheckpointComplete(long checkpointId) throws Exception { checkpointId, maxCommittedCheckpointId); } + + // reload the table in case new configuration is needed + this.table = tableLoader.loadTable(); } private void commitUpToCheckpoint( diff --git a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/ManifestOutputFileFactory.java b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/ManifestOutputFileFactory.java index 045e45a4ceae..da5e6e7627ae 100644 --- a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/ManifestOutputFileFactory.java +++ b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/ManifestOutputFileFactory.java @@ -20,9 +20,11 @@ import java.util.Map; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Supplier; import org.apache.iceberg.FileFormat; +import org.apache.iceberg.HasTableOperations; +import org.apache.iceberg.Table; import org.apache.iceberg.TableOperations; -import org.apache.iceberg.io.FileIO; import org.apache.iceberg.io.OutputFile; import org.apache.iceberg.relocated.com.google.common.base.Strings; @@ -31,8 +33,7 @@ class ManifestOutputFileFactory { // properties. static final String FLINK_MANIFEST_LOCATION = "flink.manifests.location"; - private final TableOperations ops; - private final FileIO io; + private final Supplier
tableSupplier; private final Map props; private final String flinkJobId; private final String operatorUniqueId; @@ -41,15 +42,13 @@ class ManifestOutputFileFactory { private final AtomicInteger fileCount = new AtomicInteger(0); ManifestOutputFileFactory( - TableOperations ops, - FileIO io, + Supplier
tableSupplier, Map props, String flinkJobId, String operatorUniqueId, int subTaskId, long attemptNumber) { - this.ops = ops; - this.io = io; + this.tableSupplier = tableSupplier; this.props = props; this.flinkJobId = flinkJobId; this.operatorUniqueId = operatorUniqueId; @@ -71,6 +70,7 @@ private String generatePath(long checkpointId) { OutputFile create(long checkpointId) { String flinkManifestDir = props.get(FLINK_MANIFEST_LOCATION); + TableOperations ops = ((HasTableOperations) tableSupplier.get()).operations(); String newManifestFullPath; if (Strings.isNullOrEmpty(flinkManifestDir)) { @@ -81,7 +81,7 @@ OutputFile create(long checkpointId) { String.format("%s/%s", stripTrailingSlash(flinkManifestDir), generatePath(checkpointId)); } - return io.newOutputFile(newManifestFullPath); + return tableSupplier.get().io().newOutputFile(newManifestFullPath); } private static String stripTrailingSlash(String path) { diff --git a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/RowDataTaskWriterFactory.java b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/RowDataTaskWriterFactory.java index c624eb3f0276..67422a1afeb1 100644 --- a/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/RowDataTaskWriterFactory.java +++ b/flink/v1.17/flink/src/main/java/org/apache/iceberg/flink/sink/RowDataTaskWriterFactory.java @@ -20,6 +20,7 @@ import java.util.List; import java.util.Map; +import java.util.function.Supplier; import org.apache.flink.table.data.RowData; import org.apache.flink.table.types.logical.RowType; import org.apache.iceberg.FileFormat; @@ -38,13 +39,13 @@ import org.apache.iceberg.relocated.com.google.common.collect.Sets; import org.apache.iceberg.types.TypeUtil; import org.apache.iceberg.util.ArrayUtil; +import org.apache.iceberg.util.SerializableSupplier; public class RowDataTaskWriterFactory implements TaskWriterFactory { - private final Table table; + private final Supplier
tableSupplier; private final Schema schema; private final RowType flinkSchema; private final PartitionSpec spec; - private final FileIO io; private final long targetFileSizeBytes; private final FileFormat format; private final List equalityFieldIds; @@ -61,11 +62,37 @@ public RowDataTaskWriterFactory( Map writeProperties, List equalityFieldIds, boolean upsert) { - this.table = table; + this( + () -> table, + flinkSchema, + targetFileSizeBytes, + format, + writeProperties, + equalityFieldIds, + upsert); + } + + public RowDataTaskWriterFactory( + SerializableSupplier
tableSupplier, + RowType flinkSchema, + long targetFileSizeBytes, + FileFormat format, + Map writeProperties, + List equalityFieldIds, + boolean upsert) { + this.tableSupplier = tableSupplier; + + Table table; + if (tableSupplier instanceof CachingTableSupplier) { + // rely on the initial table metadata for schema, etc., until schema evolution is supported + table = ((CachingTableSupplier) tableSupplier).initialTable(); + } else { + table = tableSupplier.get(); + } + this.schema = table.schema(); this.flinkSchema = flinkSchema; this.spec = table.spec(); - this.io = table.io(); this.targetFileSizeBytes = targetFileSizeBytes; this.format = format; this.equalityFieldIds = equalityFieldIds; @@ -107,8 +134,21 @@ public RowDataTaskWriterFactory( @Override public void initialize(int taskId, int attemptId) { + Table table; + if (tableSupplier instanceof CachingTableSupplier) { + // rely on the initial table metadata for schema, etc., until schema evolution is supported + table = ((CachingTableSupplier) tableSupplier).initialTable(); + } else { + table = tableSupplier.get(); + } + + refreshTable(); + this.outputFileFactory = - OutputFileFactory.builderFor(table, taskId, attemptId).format(format).build(); + OutputFileFactory.builderFor(table, taskId, attemptId) + .format(format) + .ioSupplier(() -> tableSupplier.get().io()) + .build(); } @Override @@ -117,18 +157,25 @@ public TaskWriter create() { outputFileFactory, "The outputFileFactory shouldn't be null if we have invoked the initialize()."); + refreshTable(); + if (equalityFieldIds == null || equalityFieldIds.isEmpty()) { // Initialize a task writer to write INSERT only. if (spec.isUnpartitioned()) { return new UnpartitionedWriter<>( - spec, format, appenderFactory, outputFileFactory, io, targetFileSizeBytes); + spec, + format, + appenderFactory, + outputFileFactory, + tableSupplier.get().io(), + targetFileSizeBytes); } else { return new RowDataPartitionedFanoutWriter( spec, format, appenderFactory, outputFileFactory, - io, + tableSupplier.get().io(), targetFileSizeBytes, schema, flinkSchema); @@ -141,7 +188,7 @@ public TaskWriter create() { format, appenderFactory, outputFileFactory, - io, + tableSupplier.get().io(), targetFileSizeBytes, schema, flinkSchema, @@ -153,7 +200,7 @@ public TaskWriter create() { format, appenderFactory, outputFileFactory, - io, + tableSupplier.get().io(), targetFileSizeBytes, schema, flinkSchema, @@ -163,6 +210,12 @@ public TaskWriter create() { } } + void refreshTable() { + if (tableSupplier instanceof CachingTableSupplier) { + ((CachingTableSupplier) tableSupplier).refreshTable(); + } + } + private static class RowDataPartitionedFanoutWriter extends PartitionedFanoutWriter { private final PartitionKey partitionKey; diff --git a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/TestFlinkCatalogTable.java b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/TestFlinkCatalogTable.java index db83cea1e536..16dcf4a9f4ce 100644 --- a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/TestFlinkCatalogTable.java +++ b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/TestFlinkCatalogTable.java @@ -18,6 +18,8 @@ */ package org.apache.iceberg.flink; +import static org.assertj.core.api.Assertions.assertThat; + import java.util.Arrays; import java.util.Collections; import java.util.Map; @@ -47,7 +49,6 @@ import org.apache.iceberg.catalog.TableIdentifier; import org.apache.iceberg.exceptions.NoSuchTableException; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList; -import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Maps; @@ -121,12 +122,10 @@ public void testCreateTable() throws TableNotExistException { Assert.assertEquals( new Schema(Types.NestedField.optional(1, "id", Types.LongType.get())).asStruct(), table.schema().asStruct()); - Assert.assertEquals(Maps.newHashMap(), table.properties()); CatalogTable catalogTable = catalogTable("tl"); Assert.assertEquals( TableSchema.builder().field("id", DataTypes.BIGINT()).build(), catalogTable.getSchema()); - Assert.assertEquals(Maps.newHashMap(), catalogTable.getOptions()); } @Test @@ -176,7 +175,7 @@ public void testCreateTableIfNotExists() { sql("CREATE TABLE tl(id BIGINT)"); // Assert that table does exist. - Assert.assertEquals(Maps.newHashMap(), table("tl").properties()); + assertThat(table("tl")).isNotNull(); sql("DROP TABLE tl"); Assertions.assertThatThrownBy(() -> table("tl")) @@ -184,15 +183,13 @@ public void testCreateTableIfNotExists() { .hasMessage("Table does not exist: " + getFullQualifiedTableName("tl")); sql("CREATE TABLE IF NOT EXISTS tl(id BIGINT)"); - Assert.assertEquals(Maps.newHashMap(), table("tl").properties()); + assertThat(table("tl").properties()).doesNotContainKey("key"); - final Map expectedProperties = ImmutableMap.of("key", "value"); table("tl").updateProperties().set("key", "value").commit(); - Assert.assertEquals(expectedProperties, table("tl").properties()); + assertThat(table("tl").properties()).containsEntry("key", "value"); sql("CREATE TABLE IF NOT EXISTS tl(id BIGINT)"); - Assert.assertEquals( - "Should still be the old table.", expectedProperties, table("tl").properties()); + assertThat(table("tl").properties()).containsEntry("key", "value"); } @Test @@ -204,12 +201,10 @@ public void testCreateTableLike() throws TableNotExistException { Assert.assertEquals( new Schema(Types.NestedField.optional(1, "id", Types.LongType.get())).asStruct(), table.schema().asStruct()); - Assert.assertEquals(Maps.newHashMap(), table.properties()); CatalogTable catalogTable = catalogTable("tl2"); Assert.assertEquals( TableSchema.builder().field("id", DataTypes.BIGINT()).build(), catalogTable.getSchema()); - Assert.assertEquals(Maps.newHashMap(), catalogTable.getOptions()); } @Test @@ -224,7 +219,6 @@ public void testCreateTableLocation() { new Schema(Types.NestedField.optional(1, "id", Types.LongType.get())).asStruct(), table.schema().asStruct()); Assert.assertEquals("file:///tmp/location", table.location()); - Assert.assertEquals(Maps.newHashMap(), table.properties()); } @Test @@ -240,7 +234,6 @@ public void testCreatePartitionTable() throws TableNotExistException { table.schema().asStruct()); Assert.assertEquals( PartitionSpec.builderFor(table.schema()).identity("dt").build(), table.spec()); - Assert.assertEquals(Maps.newHashMap(), table.properties()); CatalogTable catalogTable = catalogTable("tl"); Assert.assertEquals( @@ -249,7 +242,6 @@ public void testCreatePartitionTable() throws TableNotExistException { .field("dt", DataTypes.STRING()) .build(), catalogTable.getSchema()); - Assert.assertEquals(Maps.newHashMap(), catalogTable.getOptions()); Assert.assertEquals(Collections.singletonList("dt"), catalogTable.getPartitionKeys()); } @@ -301,7 +293,6 @@ public void testLoadTransformPartitionTable() throws TableNotExistException { CatalogTable catalogTable = catalogTable("tl"); Assert.assertEquals( TableSchema.builder().field("id", DataTypes.BIGINT()).build(), catalogTable.getSchema()); - Assert.assertEquals(Maps.newHashMap(), catalogTable.getOptions()); Assert.assertEquals(Collections.emptyList(), catalogTable.getPartitionKeys()); } @@ -314,12 +305,12 @@ public void testAlterTable() throws TableNotExistException { // new sql("ALTER TABLE tl SET('newK'='newV')"); properties.put("newK", "newV"); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); // update old sql("ALTER TABLE tl SET('oldK'='oldV2')"); properties.put("oldK", "oldV2"); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); // remove property CatalogTable catalogTable = catalogTable("tl"); @@ -328,7 +319,7 @@ public void testAlterTable() throws TableNotExistException { .getCatalog(getTableEnv().getCurrentCatalog()) .get() .alterTable(new ObjectPath(DATABASE, "tl"), catalogTable.copy(properties), false); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); } @Test @@ -340,12 +331,12 @@ public void testAlterTableWithPrimaryKey() throws TableNotExistException { // new sql("ALTER TABLE tl SET('newK'='newV')"); properties.put("newK", "newV"); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); // update old sql("ALTER TABLE tl SET('oldK'='oldV2')"); properties.put("oldK", "oldV2"); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); // remove property CatalogTable catalogTable = catalogTable("tl"); @@ -354,7 +345,7 @@ public void testAlterTableWithPrimaryKey() throws TableNotExistException { .getCatalog(getTableEnv().getCurrentCatalog()) .get() .alterTable(new ObjectPath(DATABASE, "tl"), catalogTable.copy(properties), false); - Assert.assertEquals(properties, table("tl").properties()); + assertThat(table("tl").properties()).containsAllEntriesOf(properties); } @Test diff --git a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/TestFlinkConfParser.java b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/TestFlinkConfParser.java new file mode 100644 index 000000000000..4b6ac25ab8e3 --- /dev/null +++ b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/TestFlinkConfParser.java @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.flink; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.time.Duration; +import java.util.Map; +import org.apache.flink.configuration.ConfigOption; +import org.apache.flink.configuration.ConfigOptions; +import org.apache.flink.configuration.Configuration; +import org.apache.iceberg.Table; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.junit.jupiter.api.Test; + +public class TestFlinkConfParser { + + @Test + public void testDurationConf() { + Map writeOptions = ImmutableMap.of("write-prop", "111s"); + + ConfigOption configOption = + ConfigOptions.key("conf-prop").durationType().noDefaultValue(); + Configuration flinkConf = new Configuration(); + flinkConf.setString(configOption.key(), "222s"); + + Table table = mock(Table.class); + when(table.properties()).thenReturn(ImmutableMap.of("table-prop", "333s")); + + FlinkConfParser confParser = new FlinkConfParser(table, writeOptions, flinkConf); + Duration defaultVal = Duration.ofMillis(999); + + Duration result = + confParser.durationConf().option("write-prop").defaultValue(defaultVal).parse(); + assertThat(result).isEqualTo(Duration.ofSeconds(111)); + + result = confParser.durationConf().flinkConfig(configOption).defaultValue(defaultVal).parse(); + assertThat(result).isEqualTo(Duration.ofSeconds(222)); + + result = confParser.durationConf().tableProperty("table-prop").defaultValue(defaultVal).parse(); + assertThat(result).isEqualTo(Duration.ofSeconds(333)); + } +} diff --git a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/TestTableLoader.java b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/TestTableLoader.java index 93e97d5aa3bc..4ad302dde436 100644 --- a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/TestTableLoader.java +++ b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/TestTableLoader.java @@ -36,6 +36,11 @@ public TestTableLoader(String dir) { @Override public void open() {} + @Override + public boolean isOpen() { + return true; + } + @Override public Table loadTable() { return TestTables.load(dir, "test"); diff --git a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestCachingTableSupplier.java b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestCachingTableSupplier.java new file mode 100644 index 000000000000..360db658cd2f --- /dev/null +++ b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestCachingTableSupplier.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg.flink.sink; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +import java.time.Duration; +import java.util.concurrent.TimeUnit; +import org.apache.iceberg.SerializableTable; +import org.apache.iceberg.Table; +import org.apache.iceberg.flink.TableLoader; +import org.awaitility.Awaitility; +import org.junit.jupiter.api.Test; + +public class TestCachingTableSupplier { + + @Test + public void testCheckArguments() { + SerializableTable initialTable = mock(SerializableTable.class); + + Table loadedTable = mock(Table.class); + TableLoader tableLoader = mock(TableLoader.class); + when(tableLoader.loadTable()).thenReturn(loadedTable); + + new CachingTableSupplier(initialTable, tableLoader, Duration.ofMillis(100)); + + assertThatThrownBy(() -> new CachingTableSupplier(initialTable, tableLoader, null)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("tableRefreshInterval cannot be null"); + assertThatThrownBy(() -> new CachingTableSupplier(null, tableLoader, Duration.ofMillis(100))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("initialTable cannot be null"); + assertThatThrownBy(() -> new CachingTableSupplier(initialTable, null, Duration.ofMillis(100))) + .isInstanceOf(IllegalArgumentException.class) + .hasMessage("tableLoader cannot be null"); + } + + @Test + public void testTableReload() { + SerializableTable initialTable = mock(SerializableTable.class); + + Table loadedTable = mock(Table.class); + TableLoader tableLoader = mock(TableLoader.class); + when(tableLoader.loadTable()).thenReturn(loadedTable); + + CachingTableSupplier cachingTableSupplier = + new CachingTableSupplier(initialTable, tableLoader, Duration.ofMillis(100)); + + // refresh shouldn't do anything as the min reload interval hasn't passed + cachingTableSupplier.refreshTable(); + assertThat(cachingTableSupplier.get()).isEqualTo(initialTable); + + // refresh after waiting past the min reload interval + Awaitility.await() + .atLeast(100, TimeUnit.MILLISECONDS) + .untilAsserted( + () -> { + cachingTableSupplier.refreshTable(); + assertThat(cachingTableSupplier.get()).isEqualTo(loadedTable); + }); + } +} diff --git a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestCompressionSettings.java b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestCompressionSettings.java index 49f472b7325e..14f12422da96 100644 --- a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestCompressionSettings.java +++ b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestCompressionSettings.java @@ -134,11 +134,11 @@ public void testCompressionParquet() throws Exception { if (initProperties.get(TableProperties.PARQUET_COMPRESSION) == null) { Assert.assertEquals( - TableProperties.PARQUET_COMPRESSION_DEFAULT, + TableProperties.PARQUET_COMPRESSION_DEFAULT_SINCE_1_4_0, resultProperties.get(TableProperties.PARQUET_COMPRESSION)); Assert.assertEquals( TableProperties.PARQUET_COMPRESSION_LEVEL_DEFAULT, - resultProperties.get(TableProperties.PARQUET_COMPRESSION_LEVEL)); + resultProperties.get(TableProperties.PARQUET_COMPRESSION_DEFAULT_SINCE_1_4_0)); } else { Assert.assertEquals( initProperties.get(TableProperties.PARQUET_COMPRESSION), @@ -215,7 +215,7 @@ private static OneInputStreamOperatorTestHarness createIce icebergTable, override, new org.apache.flink.configuration.Configuration()); IcebergStreamWriter streamWriter = - FlinkSink.createStreamWriter(icebergTable, flinkWriteConfig, flinkRowType, null); + FlinkSink.createStreamWriter(() -> icebergTable, flinkWriteConfig, flinkRowType, null); OneInputStreamOperatorTestHarness harness = new OneInputStreamOperatorTestHarness<>(streamWriter, 1, 1, 0); diff --git a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkIcebergSink.java b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkIcebergSink.java index dc9e8991edb1..11a73d2cc144 100644 --- a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkIcebergSink.java +++ b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkIcebergSink.java @@ -22,6 +22,7 @@ import java.util.Collections; import java.util.List; import java.util.Map; +import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.table.api.TableSchema; @@ -369,4 +370,28 @@ public void testOverrideWriteConfigWithUnknownFileFormat() { .isInstanceOf(IllegalArgumentException.class) .hasMessage("Invalid file format: UNRECOGNIZED"); } + + @Test + public void testWriteRowWithTableRefreshInterval() throws Exception { + List rows = Lists.newArrayList(Row.of(1, "hello"), Row.of(2, "world"), Row.of(3, "foo")); + DataStream dataStream = + env.addSource(createBoundedSource(rows), ROW_TYPE_INFO) + .map(CONVERTER::toInternal, FlinkCompatibilityUtil.toTypeInfo(SimpleDataUtil.ROW_TYPE)); + + Configuration flinkConf = new Configuration(); + flinkConf.setString(FlinkWriteOptions.TABLE_REFRESH_INTERVAL.key(), "100ms"); + + FlinkSink.forRowData(dataStream) + .table(table) + .tableLoader(tableLoader) + .flinkConf(flinkConf) + .writeParallelism(parallelism) + .append(); + + // Execute the program. + env.execute("Test Iceberg DataStream"); + + // Assert the iceberg table's records. + SimpleDataUtil.assertTableRows(table, convertToRowData(rows)); + } } diff --git a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkManifest.java b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkManifest.java index f171485a90f7..ce1f208a4b07 100644 --- a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkManifest.java +++ b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestFlinkManifest.java @@ -34,7 +34,6 @@ import org.apache.iceberg.DataFile; import org.apache.iceberg.DeleteFile; import org.apache.iceberg.FileFormat; -import org.apache.iceberg.HasTableOperations; import org.apache.iceberg.ManifestFile; import org.apache.iceberg.ManifestFiles; import org.apache.iceberg.Table; @@ -94,7 +93,8 @@ public void testIO() throws IOException { String operatorId = newOperatorUniqueId(); for (long checkpointId = 1; checkpointId <= 3; checkpointId++) { ManifestOutputFileFactory factory = - FlinkManifestUtil.createOutputFileFactory(table, flinkJobId, operatorId, 1, 1); + FlinkManifestUtil.createOutputFileFactory( + () -> table, table.properties(), flinkJobId, operatorId, 1, 1); final long curCkpId = checkpointId; List dataFiles = generateDataFiles(10); @@ -135,14 +135,7 @@ public void testUserProvidedManifestLocation() throws IOException { Map props = ImmutableMap.of(FLINK_MANIFEST_LOCATION, userProvidedFolder.getAbsolutePath() + "///"); ManifestOutputFileFactory factory = - new ManifestOutputFileFactory( - ((HasTableOperations) table).operations(), - table.io(), - props, - flinkJobId, - operatorId, - 1, - 1); + new ManifestOutputFileFactory(() -> table, props, flinkJobId, operatorId, 1, 1); List dataFiles = generateDataFiles(5); DeltaManifests deltaManifests = @@ -177,7 +170,8 @@ public void testVersionedSerializer() throws IOException { String flinkJobId = newFlinkJobId(); String operatorId = newOperatorUniqueId(); ManifestOutputFileFactory factory = - FlinkManifestUtil.createOutputFileFactory(table, flinkJobId, operatorId, 1, 1); + FlinkManifestUtil.createOutputFileFactory( + () -> table, table.properties(), flinkJobId, operatorId, 1, 1); List dataFiles = generateDataFiles(10); List eqDeleteFiles = generateEqDeleteFiles(10); @@ -214,7 +208,8 @@ public void testCompatibility() throws IOException { String flinkJobId = newFlinkJobId(); String operatorId = newOperatorUniqueId(); ManifestOutputFileFactory factory = - FlinkManifestUtil.createOutputFileFactory(table, flinkJobId, operatorId, 1, 1); + FlinkManifestUtil.createOutputFileFactory( + () -> table, table.properties(), flinkJobId, operatorId, 1, 1); List dataFiles = generateDataFiles(10); ManifestFile manifest = diff --git a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergStreamWriter.java b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergStreamWriter.java index fa69c5d4d1fd..0968f89f55e0 100644 --- a/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergStreamWriter.java +++ b/flink/v1.17/flink/src/test/java/org/apache/iceberg/flink/sink/TestIcebergStreamWriter.java @@ -376,7 +376,7 @@ private OneInputStreamOperatorTestHarness createIcebergStr icebergTable, Maps.newHashMap(), new org.apache.flink.configuration.Configuration()); IcebergStreamWriter streamWriter = - FlinkSink.createStreamWriter(icebergTable, flinkWriteConfig, flinkRowType, null); + FlinkSink.createStreamWriter(() -> icebergTable, flinkWriteConfig, flinkRowType, null); OneInputStreamOperatorTestHarness harness = new OneInputStreamOperatorTestHarness<>(streamWriter, 1, 1, 0); diff --git a/format/spec.md b/format/spec.md index 60c0f99c3f90..01903393f88f 100644 --- a/format/spec.md +++ b/format/spec.md @@ -541,7 +541,7 @@ Manifest list files store `manifest_file`, a struct with the following fields: | | _required_ | **`517 content`** | `int` with meaning: `0: data`, `1: deletes` | The type of files tracked by the manifest, either data or delete files; 0 for all v1 manifests | | | _required_ | **`515 sequence_number`** | `long` | The sequence number when the manifest was added to the table; use 0 when reading v1 manifest lists | | | _required_ | **`516 min_sequence_number`** | `long` | The minimum data sequence number of all live data or delete files in the manifest; use 0 when reading v1 manifest lists | -| _required_ | _required_ | **`503 added_snapshot_id`** | `long` | ID of the snapshot where the manifest file was added | +| _optional_ | _required_ | **`503 added_snapshot_id`** | `long` | ID of the snapshot where the manifest file was added | | _optional_ | _required_ | **`504 added_files_count`** | `int` | Number of entries in the manifest that have status `ADDED` (1), when `null` this is assumed to be non-zero | | _optional_ | _required_ | **`505 existing_files_count`** | `int` | Number of entries in the manifest that have status `EXISTING` (0), when `null` this is assumed to be non-zero | | _optional_ | _required_ | **`506 deleted_files_count`** | `int` | Number of entries in the manifest that have status `DELETED` (2), when `null` this is assumed to be non-zero | diff --git a/format/view-spec.md b/format/view-spec.md index d7e0b7b7a60d..26313193afad 100644 --- a/format/view-spec.md +++ b/format/view-spec.md @@ -58,6 +58,7 @@ The view version metadata file has the following fields: | Requirement | Field name | Description | |-------------|----------------------|-------------| +| _required_ | `view-uuid` | A UUID that identifies the view, generated when the view is created. Implementations must throw an exception if a view's UUID does not match the expected UUID after refreshing metadata | | _required_ | `format-version` | An integer version number for the view format; must be 1 | | _required_ | `location` | The view's base location; used to create metadata file locations | | _required_ | `schemas` | A list of known schemas | @@ -192,6 +193,7 @@ s3://bucket/warehouse/default.db/event_agg/metadata/00001-(uuid).metadata.json ``` ``` { + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", "format-version" : 1, "location" : "s3://bucket/warehouse/default.db/event_agg", "current-version-id" : 1, @@ -259,6 +261,7 @@ s3://bucket/warehouse/default.db/event_agg/metadata/00002-(uuid).metadata.json ``` ``` { + "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385", "format-version" : 1, "location" : "s3://bucket/warehouse/default.db/event_agg", "current-version-id" : 1, diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index d562ff7e799b..2deb6c3c2d6f 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -47,7 +47,7 @@ kryo-shaded = "4.0.3" microprofile-openapi-api = "3.1.1" mockito = "4.11.0" mockserver = "5.15.0" -nessie = "0.67.0" +nessie = "0.71.0" netty-buffer = "4.1.97.Final" netty-buffer-compat = "4.1.68.Final" object-client-bundle = "3.3.2" diff --git a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java index 6b06c33a730b..7ff2bd78a665 100644 --- a/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java +++ b/hive-metastore/src/test/java/org/apache/iceberg/hive/TestHiveCatalog.java @@ -124,6 +124,11 @@ public void testCreateTableBuilder() throws Exception { assertThat(table.spec().fields()).hasSize(1); assertThat(table.properties()).containsEntry("key1", "value1"); assertThat(table.properties()).containsEntry("key2", "value2"); + // default Parquet compression is explicitly set for new tables + assertThat(table.properties()) + .containsEntry( + TableProperties.PARQUET_COMPRESSION, + TableProperties.PARQUET_COMPRESSION_DEFAULT_SINCE_1_4_0); } finally { catalog.dropTable(tableIdent); } @@ -146,6 +151,11 @@ public void testCreateTableWithCaching() throws Exception { assertThat(table.spec().fields()).hasSize(1); assertThat(table.properties()).containsEntry("key1", "value1"); assertThat(table.properties()).containsEntry("key2", "value2"); + // default Parquet compression is explicitly set for new tables + assertThat(table.properties()) + .containsEntry( + TableProperties.PARQUET_COMPRESSION, + TableProperties.PARQUET_COMPRESSION_DEFAULT_SINCE_1_4_0); } finally { cachingCatalog.dropTable(tableIdent); } diff --git a/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java b/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java index dba1994aec96..f849163acc6a 100644 --- a/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java +++ b/mr/src/test/java/org/apache/iceberg/mr/TestCatalogs.java @@ -19,9 +19,9 @@ package org.apache.iceberg.mr; import static org.apache.iceberg.types.Types.NestedField.required; +import static org.assertj.core.api.Assertions.assertThat; import java.io.IOException; -import java.util.Collections; import java.util.Optional; import java.util.Properties; import org.apache.hadoop.conf.Configuration; @@ -126,7 +126,7 @@ public void testCreateDropTableToLocation() throws IOException { Assert.assertEquals(properties.getProperty("location"), table.location()); Assert.assertEquals(SchemaParser.toJson(SCHEMA), SchemaParser.toJson(table.schema())); Assert.assertEquals(PartitionSpecParser.toJson(SPEC), PartitionSpecParser.toJson(table.spec())); - Assert.assertEquals(Collections.singletonMap("dummy", "test"), table.properties()); + assertThat(table.properties()).containsEntry("dummy", "test"); Assertions.assertThatThrownBy(() -> Catalogs.dropTable(conf, new Properties())) .isInstanceOf(NullPointerException.class) @@ -178,7 +178,7 @@ public void testCreateDropTableToCatalog() throws IOException { Assert.assertEquals(SchemaParser.toJson(SCHEMA), SchemaParser.toJson(table.schema())); Assert.assertEquals(PartitionSpecParser.toJson(SPEC), PartitionSpecParser.toJson(table.spec())); - Assert.assertEquals(Collections.singletonMap("dummy", "test"), table.properties()); + assertThat(table.properties()).containsEntry("dummy", "test"); Assertions.assertThatThrownBy(() -> Catalogs.dropTable(conf, new Properties())) .isInstanceOf(NullPointerException.class) diff --git a/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java b/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java index 775ecbffe109..81e2ffcc84da 100644 --- a/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java +++ b/mr/src/test/java/org/apache/iceberg/mr/hive/TestHiveIcebergStorageHandlerNoScan.java @@ -759,6 +759,9 @@ public void testIcebergAndHmsTableProperties() throws Exception { expectedIcebergProperties.put("custom_property", "initial_val"); expectedIcebergProperties.put("EXTERNAL", "TRUE"); expectedIcebergProperties.put("storage_handler", HiveIcebergStorageHandler.class.getName()); + expectedIcebergProperties.put( + TableProperties.PARQUET_COMPRESSION, + TableProperties.PARQUET_COMPRESSION_DEFAULT_SINCE_1_4_0); // Check the HMS table parameters org.apache.hadoop.hive.metastore.api.Table hmsTable = @@ -779,7 +782,7 @@ public void testIcebergAndHmsTableProperties() throws Exception { Assert.assertEquals(expectedIcebergProperties, icebergTable.properties()); if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) { - Assert.assertEquals(13, hmsParams.size()); + Assert.assertEquals(14, hmsParams.size()); Assert.assertEquals("initial_val", hmsParams.get("custom_property")); Assert.assertEquals("TRUE", hmsParams.get(InputFormatConfig.EXTERNAL_TABLE_PURGE)); Assert.assertEquals("TRUE", hmsParams.get("EXTERNAL")); @@ -824,8 +827,8 @@ public void testIcebergAndHmsTableProperties() throws Exception { .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); if (Catalogs.hiveCatalog(shell.getHiveConf(), tableProperties)) { - Assert.assertEquals( - 16, hmsParams.size()); // 2 newly-added properties + previous_metadata_location prop + // 2 newly-added properties + previous_metadata_location prop + explicit Parquet compression + Assert.assertEquals(17, hmsParams.size()); Assert.assertEquals("true", hmsParams.get("new_prop_1")); Assert.assertEquals("false", hmsParams.get("new_prop_2")); Assert.assertEquals("new_val", hmsParams.get("custom_property")); diff --git a/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java b/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java index b02df3d7596d..46c8adca3e5f 100644 --- a/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java +++ b/nessie/src/main/java/org/apache/iceberg/nessie/NessieCatalog.java @@ -46,6 +46,8 @@ import org.projectnessie.client.NessieConfigConstants; import org.projectnessie.client.api.NessieApiV1; import org.projectnessie.client.api.NessieApiV2; +import org.projectnessie.client.config.NessieClientConfigSource; +import org.projectnessie.client.config.NessieClientConfigSources; import org.projectnessie.client.http.HttpClientBuilder; import org.projectnessie.model.ContentKey; import org.projectnessie.model.TableReference; @@ -94,10 +96,11 @@ public void initialize(String name, Map options) { String requestedHash = options.get(removePrefix.apply(NessieConfigConstants.CONF_NESSIE_REF_HASH)); - NessieClientBuilder nessieClientBuilder = - createNessieClientBuilder( - options.get(NessieConfigConstants.CONF_NESSIE_CLIENT_BUILDER_IMPL)) - .fromConfig(x -> options.get(removePrefix.apply(x))); + NessieClientConfigSource configSource = + NessieClientConfigSources.mapConfigSource(options) + .fallbackTo(x -> options.get(removePrefix.apply(x))); + NessieClientBuilder nessieClientBuilder = + NessieClientBuilder.createClientBuilderFromSystemSettings(configSource); // default version is set to v1. final String apiVersion = options.getOrDefault(removePrefix.apply(NessieUtil.CLIENT_API_VERSION), "1"); @@ -182,8 +185,8 @@ private String validateWarehouseLocation(String name, Map catalo return warehouseLocation; } - private static NessieClientBuilder createNessieClientBuilder(String customBuilder) { - NessieClientBuilder clientBuilder; + private static NessieClientBuilder createNessieClientBuilder(String customBuilder) { + NessieClientBuilder clientBuilder; if (customBuilder != null) { try { clientBuilder = diff --git a/nessie/src/test/java/org/apache/iceberg/nessie/TestCustomNessieClient.java b/nessie/src/test/java/org/apache/iceberg/nessie/TestCustomNessieClient.java index 40341358073f..85dc3af8b4a4 100644 --- a/nessie/src/test/java/org/apache/iceberg/nessie/TestCustomNessieClient.java +++ b/nessie/src/test/java/org/apache/iceberg/nessie/TestCustomNessieClient.java @@ -20,17 +20,14 @@ import static org.assertj.core.api.Assertions.assertThatThrownBy; -import java.net.URI; -import java.util.function.Function; import org.apache.iceberg.CatalogProperties; import org.apache.iceberg.TestCatalogUtil; import org.apache.iceberg.io.FileIO; import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.junit.jupiter.api.Test; -import org.projectnessie.client.NessieClientBuilder; +import org.projectnessie.client.NessieClientBuilder.AbstractNessieClientBuilder; import org.projectnessie.client.NessieConfigConstants; import org.projectnessie.client.api.NessieApi; -import org.projectnessie.client.auth.NessieAuthentication; import org.projectnessie.client.http.HttpClientBuilder; public class TestCustomNessieClient extends BaseTestIceberg { @@ -71,7 +68,6 @@ public void testUnnecessaryDefaultCustomClient() { @Test public void testNonExistentCustomClient() { - String nonExistingClass = "non.existent.ClientBuilderImpl"; assertThatThrownBy( () -> { NessieCatalog catalog = new NessieCatalog(); @@ -83,14 +79,14 @@ public void testNonExistentCustomClient() { CatalogProperties.URI, uri, NessieConfigConstants.CONF_NESSIE_CLIENT_BUILDER_IMPL, - nonExistingClass)); + "non.existent.ClientBuilderImpl")); }) .isInstanceOf(RuntimeException.class) - .hasMessageContaining(nonExistingClass); + .hasMessageContaining("Cannot load Nessie client builder implementation class"); } @Test - public void testCustomClient() { + public void testCustomClientByImpl() { assertThatThrownBy( () -> { NessieCatalog catalog = new NessieCatalog(); @@ -108,6 +104,25 @@ public void testCustomClient() { .hasMessage("BUILD CALLED"); } + @Test + public void testCustomClientByName() { + assertThatThrownBy( + () -> { + NessieCatalog catalog = new NessieCatalog(); + catalog.initialize( + "nessie", + ImmutableMap.of( + CatalogProperties.WAREHOUSE_LOCATION, + temp.toUri().toString(), + CatalogProperties.URI, + uri, + NessieConfigConstants.CONF_NESSIE_CLIENT_NAME, + "Dummy")); + }) + .isInstanceOf(RuntimeException.class) + .hasMessage("BUILD CALLED"); + } + @Test public void testAlternativeInitializeWithNulls() { NessieCatalog catalog = new NessieCatalog(); @@ -128,7 +143,7 @@ public void testAlternativeInitializeWithNulls() { } @SuppressWarnings("rawtypes") - public static final class DummyClientBuilderImpl implements NessieClientBuilder { + public static final class DummyClientBuilderImpl extends AbstractNessieClientBuilder { @SuppressWarnings("unused") public static DummyClientBuilderImpl builder() { @@ -136,33 +151,18 @@ public static DummyClientBuilderImpl builder() { } @Override - public NessieClientBuilder fromSystemProperties() { - return this; - } - - @Override - public NessieClientBuilder withAuthentication(NessieAuthentication authentication) { - return this; - } - - @Override - public NessieClientBuilder withUri(URI uri) { - return this; - } - - @Override - public NessieClientBuilder withAuthenticationFromConfig(Function configuration) { - return this; + public A build(Class apiContract) { + throw new RuntimeException("BUILD CALLED"); } @Override - public NessieClientBuilder fromConfig(Function configuration) { - return this; + public String name() { + return "Dummy"; } @Override - public NessieApi build(Class apiContract) { - throw new RuntimeException("BUILD CALLED"); + public int priority() { + return 42; } } } diff --git a/nessie/src/test/resources/META-INF/services/org.projectnessie.client.NessieClientBuilder b/nessie/src/test/resources/META-INF/services/org.projectnessie.client.NessieClientBuilder new file mode 100644 index 000000000000..289944f550ea --- /dev/null +++ b/nessie/src/test/resources/META-INF/services/org.projectnessie.client.NessieClientBuilder @@ -0,0 +1,16 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +org.apache.iceberg.nessie.TestCustomNessieClient$DummyClientBuilderImpl diff --git a/python/.pre-commit-config.yaml b/python/.pre-commit-config.yaml index 49785f19de47..3e6c6b73b75d 100644 --- a/python/.pre-commit-config.yaml +++ b/python/.pre-commit-config.yaml @@ -91,7 +91,7 @@ repos: - id: pydocstyle args: [ - "--ignore=D100,D102,D101,D103,D104,D106,D107,D203,D212,D213,D404,D405,D406,D407,D411,D413,D415,D417", + "--ignore=D100,D102,D101,D103,D104,D107,D203,D212,D213,D404,D405,D406,D407,D411,D413,D415,D417", ] additional_dependencies: - tomli==2.0.1 diff --git a/python/poetry.lock b/python/poetry.lock index b1232e2f34df..87d41ce6da77 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -2,13 +2,13 @@ [[package]] name = "adlfs" -version = "2023.8.0" +version = "2023.9.0" description = "Access Azure Datalake Gen1 with fsspec and dask" optional = true python-versions = ">=3.8" files = [ - {file = "adlfs-2023.8.0-py3-none-any.whl", hash = "sha256:3eb248a3c2a30b419f1147bd7676d156b5219f96ef7f11d47166afd2a3bdb07e"}, - {file = "adlfs-2023.8.0.tar.gz", hash = "sha256:07e804f6df4593acfcaf01025b162e30ac13e523d3570279c98b2d91a18026d9"}, + {file = "adlfs-2023.9.0-py3-none-any.whl", hash = "sha256:e2cff62b8128578c6d1b9da1660ad4c8a5a8cb0d491bba416b529563c65dc5d2"}, + {file = "adlfs-2023.9.0.tar.gz", hash = "sha256:1ce70ffa39f7cffc3efbbd9f79b444958eb5d9de9981442b06e47472d2089d4b"}, ] [package.dependencies] @@ -17,7 +17,7 @@ azure-core = ">=1.23.1,<2.0.0" azure-datalake-store = ">=0.0.46,<0.1" azure-identity = "*" azure-storage-blob = ">=12.12.0" -fsspec = ">=2021.10.1" +fsspec = ">=2023.9.0" [package.extras] docs = ["furo", "myst-parser", "numpydoc", "sphinx"] @@ -224,13 +224,13 @@ tests-no-zope = ["cloudpickle", "hypothesis", "mypy (>=1.1.1)", "pympler", "pyte [[package]] name = "azure-core" -version = "1.29.3" +version = "1.29.4" description = "Microsoft Azure Core Library for Python" optional = true python-versions = ">=3.7" files = [ - {file = "azure-core-1.29.3.tar.gz", hash = "sha256:c92700af982e71c8c73de9f4c20da8b3f03ce2c22d13066e4d416b4629c87903"}, - {file = "azure_core-1.29.3-py3-none-any.whl", hash = "sha256:f8b2910f92b66293d93bd00564924ad20ad48f4a1e150577cf18d1e7d4f9263c"}, + {file = "azure-core-1.29.4.tar.gz", hash = "sha256:500b3aa9bf2e90c5ccc88bb105d056114ca0ce7d0ce73afb8bc4d714b2fc7568"}, + {file = "azure_core-1.29.4-py3-none-any.whl", hash = "sha256:b03261bcba22c0b9290faf9999cedd23e849ed2577feee90515694cea6bc74bf"}, ] [package.dependencies] @@ -276,13 +276,13 @@ msal-extensions = ">=0.3.0,<2.0.0" [[package]] name = "azure-storage-blob" -version = "12.17.0" +version = "12.18.1" description = "Microsoft Azure Blob Storage Client Library for Python" optional = true python-versions = ">=3.7" files = [ - {file = "azure-storage-blob-12.17.0.zip", hash = "sha256:c14b785a17050b30fc326a315bdae6bc4a078855f4f94a4c303ad74a48dc8c63"}, - {file = "azure_storage_blob-12.17.0-py3-none-any.whl", hash = "sha256:0016e0c549a80282d7b4920c03f2f4ba35c53e6e3c7dbcd2a4a8c8eb3882c1e7"}, + {file = "azure-storage-blob-12.18.1.tar.gz", hash = "sha256:d3265c2403c28d8881326c365e9cf7ed2ad55fdac98404eae753548702b31ba2"}, + {file = "azure_storage_blob-12.18.1-py3-none-any.whl", hash = "sha256:00b92568e91d608c04dfd4814c3b180818e690023493bb984c22dfc1a8a96e55"}, ] [package.dependencies] @@ -334,13 +334,13 @@ crt = ["awscrt (==0.16.26)"] [[package]] name = "build" -version = "1.0.0" +version = "1.0.3" description = "A simple, correct Python build frontend" optional = false python-versions = ">= 3.7" files = [ - {file = "build-1.0.0-py3-none-any.whl", hash = "sha256:f4c7b45e70e2c345e673902253d435a9a7729ff09ab574924420cf120c60bcc9"}, - {file = "build-1.0.0.tar.gz", hash = "sha256:49a60f212df4d9925727c2118e1cbe3abf30b393eff7d0e7287d2170eb36844d"}, + {file = "build-1.0.3-py3-none-any.whl", hash = "sha256:589bf99a67df7c9cf07ec0ac0e5e2ea5d4b37ac63301c4986d1acb126aa83f8f"}, + {file = "build-1.0.3.tar.gz", hash = "sha256:538aab1b64f9828977f84bc63ae570b060a8ed1be419e7870b8b4fc5e6ea553b"}, ] [package.dependencies] @@ -643,34 +643,34 @@ toml = ["tomli"] [[package]] name = "cryptography" -version = "41.0.3" +version = "41.0.4" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = ">=3.7" files = [ - {file = "cryptography-41.0.3-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:652627a055cb52a84f8c448185922241dd5217443ca194d5739b44612c5e6507"}, - {file = "cryptography-41.0.3-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:8f09daa483aedea50d249ef98ed500569841d6498aa9c9f4b0531b9964658922"}, - {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4fd871184321100fb400d759ad0cddddf284c4b696568204d281c902fc7b0d81"}, - {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:84537453d57f55a50a5b6835622ee405816999a7113267739a1b4581f83535bd"}, - {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:3fb248989b6363906827284cd20cca63bb1a757e0a2864d4c1682a985e3dca47"}, - {file = "cryptography-41.0.3-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:42cb413e01a5d36da9929baa9d70ca90d90b969269e5a12d39c1e0d475010116"}, - {file = "cryptography-41.0.3-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:aeb57c421b34af8f9fe830e1955bf493a86a7996cc1338fe41b30047d16e962c"}, - {file = "cryptography-41.0.3-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6af1c6387c531cd364b72c28daa29232162010d952ceb7e5ca8e2827526aceae"}, - {file = "cryptography-41.0.3-cp37-abi3-win32.whl", hash = "sha256:0d09fb5356f975974dbcb595ad2d178305e5050656affb7890a1583f5e02a306"}, - {file = "cryptography-41.0.3-cp37-abi3-win_amd64.whl", hash = "sha256:a983e441a00a9d57a4d7c91b3116a37ae602907a7618b882c8013b5762e80574"}, - {file = "cryptography-41.0.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5259cb659aa43005eb55a0e4ff2c825ca111a0da1814202c64d28a985d33b087"}, - {file = "cryptography-41.0.3-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:67e120e9a577c64fe1f611e53b30b3e69744e5910ff3b6e97e935aeb96005858"}, - {file = "cryptography-41.0.3-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:7efe8041897fe7a50863e51b77789b657a133c75c3b094e51b5e4b5cec7bf906"}, - {file = "cryptography-41.0.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ce785cf81a7bdade534297ef9e490ddff800d956625020ab2ec2780a556c313e"}, - {file = "cryptography-41.0.3-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:57a51b89f954f216a81c9d057bf1a24e2f36e764a1ca9a501a6964eb4a6800dd"}, - {file = "cryptography-41.0.3-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:4c2f0d35703d61002a2bbdcf15548ebb701cfdd83cdc12471d2bae80878a4207"}, - {file = "cryptography-41.0.3-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:23c2d778cf829f7d0ae180600b17e9fceea3c2ef8b31a99e3c694cbbf3a24b84"}, - {file = "cryptography-41.0.3-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:95dd7f261bb76948b52a5330ba5202b91a26fbac13ad0e9fc8a3ac04752058c7"}, - {file = "cryptography-41.0.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:41d7aa7cdfded09b3d73a47f429c298e80796c8e825ddfadc84c8a7f12df212d"}, - {file = "cryptography-41.0.3-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d0d651aa754ef58d75cec6edfbd21259d93810b73f6ec246436a21b7841908de"}, - {file = "cryptography-41.0.3-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:ab8de0d091acbf778f74286f4989cf3d1528336af1b59f3e5d2ebca8b5fe49e1"}, - {file = "cryptography-41.0.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:a74fbcdb2a0d46fe00504f571a2a540532f4c188e6ccf26f1f178480117b33c4"}, - {file = "cryptography-41.0.3.tar.gz", hash = "sha256:6d192741113ef5e30d89dcb5b956ef4e1578f304708701b8b73d38e3e1461f34"}, + {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:80907d3faa55dc5434a16579952ac6da800935cd98d14dbd62f6f042c7f5e839"}, + {file = "cryptography-41.0.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:35c00f637cd0b9d5b6c6bd11b6c3359194a8eba9c46d4e875a3660e3b400005f"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cecfefa17042941f94ab54f769c8ce0fe14beff2694e9ac684176a2535bf9714"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e40211b4923ba5a6dc9769eab704bdb3fbb58d56c5b336d30996c24fcf12aadb"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:23a25c09dfd0d9f28da2352503b23e086f8e78096b9fd585d1d14eca01613e13"}, + {file = "cryptography-41.0.4-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2ed09183922d66c4ec5fdaa59b4d14e105c084dd0febd27452de8f6f74704143"}, + {file = "cryptography-41.0.4-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:5a0f09cefded00e648a127048119f77bc2b2ec61e736660b5789e638f43cc397"}, + {file = "cryptography-41.0.4-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:9eeb77214afae972a00dee47382d2591abe77bdae166bda672fb1e24702a3860"}, + {file = "cryptography-41.0.4-cp37-abi3-win32.whl", hash = "sha256:3b224890962a2d7b57cf5eeb16ccaafba6083f7b811829f00476309bce2fe0fd"}, + {file = "cryptography-41.0.4-cp37-abi3-win_amd64.whl", hash = "sha256:c880eba5175f4307129784eca96f4e70b88e57aa3f680aeba3bab0e980b0f37d"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:004b6ccc95943f6a9ad3142cfabcc769d7ee38a3f60fb0dddbfb431f818c3a67"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:86defa8d248c3fa029da68ce61fe735432b047e32179883bdb1e79ed9bb8195e"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:37480760ae08065437e6573d14be973112c9e6dcaf5f11d00147ee74f37a3829"}, + {file = "cryptography-41.0.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b5f4dfe950ff0479f1f00eda09c18798d4f49b98f4e2006d644b3301682ebdca"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7e53db173370dea832190870e975a1e09c86a879b613948f09eb49324218c14d"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5b72205a360f3b6176485a333256b9bcd48700fc755fef51c8e7e67c4b63e3ac"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:93530900d14c37a46ce3d6c9e6fd35dbe5f5601bf6b3a5c325c7bffc030344d9"}, + {file = "cryptography-41.0.4-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:efc8ad4e6fc4f1752ebfb58aefece8b4e3c4cae940b0994d43649bdfce8d0d4f"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c3391bd8e6de35f6f1140e50aaeb3e2b3d6a9012536ca23ab0d9c35ec18c8a91"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:0d9409894f495d465fe6fda92cb70e8323e9648af912d5b9141d616df40a87b8"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:8ac4f9ead4bbd0bc8ab2d318f97d85147167a488be0e08814a37eb2f439d5cf6"}, + {file = "cryptography-41.0.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:047c4603aeb4bbd8db2756e38f5b8bd7e94318c047cfe4efeb5d715e08b49311"}, + {file = "cryptography-41.0.4.tar.gz", hash = "sha256:7febc3094125fc126a7f6fb1f420d0da639f3f32cb15c8ff0dc3997c4549f51a"}, ] [package.dependencies] @@ -903,21 +903,19 @@ zstandard = ["zstandard"] [[package]] name = "filelock" -version = "3.12.3" +version = "3.12.4" description = "A platform independent file lock." optional = false python-versions = ">=3.8" files = [ - {file = "filelock-3.12.3-py3-none-any.whl", hash = "sha256:f067e40ccc40f2b48395a80fcbd4728262fab54e232e090a4063ab804179efeb"}, - {file = "filelock-3.12.3.tar.gz", hash = "sha256:0ecc1dd2ec4672a10c8550a8182f1bd0c0a5088470ecd5a125e45f49472fac3d"}, + {file = "filelock-3.12.4-py3-none-any.whl", hash = "sha256:08c21d87ded6e2b9da6728c3dff51baf1dcecf973b768ef35bcbc3447edb9ad4"}, + {file = "filelock-3.12.4.tar.gz", hash = "sha256:2e6f249f1f3654291606e046b09f1fd5eac39b360664c27f5aad072012f8bcbd"}, ] -[package.dependencies] -typing-extensions = {version = ">=4.7.1", markers = "python_version < \"3.11\""} - [package.extras] docs = ["furo (>=2023.7.26)", "sphinx (>=7.1.2)", "sphinx-autodoc-typehints (>=1.24)"] testing = ["covdefaults (>=2.3)", "coverage (>=7.3)", "diff-cover (>=7.7)", "pytest (>=7.4)", "pytest-cov (>=4.1)", "pytest-mock (>=3.11.1)", "pytest-timeout (>=2.1)"] +typing = ["typing-extensions (>=4.7.1)"] [[package]] name = "frozenlist" @@ -991,13 +989,13 @@ files = [ [[package]] name = "fsspec" -version = "2023.9.0" +version = "2023.9.1" description = "File-system specification" optional = false python-versions = ">=3.8" files = [ - {file = "fsspec-2023.9.0-py3-none-any.whl", hash = "sha256:d55b9ab2a4c1f2b759888ae9f93e40c2aa72c0808132e87e282b549f9e6c4254"}, - {file = "fsspec-2023.9.0.tar.gz", hash = "sha256:4dbf0fefee035b7c6d3bbbe6bc99b2f201f40d4dca95b67c2b719be77bcd917f"}, + {file = "fsspec-2023.9.1-py3-none-any.whl", hash = "sha256:99a974063b6cced36cfaa61aa8efb05439c6fea2dafe65930e7ab46f9d2f8930"}, + {file = "fsspec-2023.9.1.tar.gz", hash = "sha256:da8cfe39eeb65aaa69074d5e0e4bbc9b7ef72d69c0587a31cab981eefdb3da13"}, ] [package.extras] @@ -1026,19 +1024,19 @@ tqdm = ["tqdm"] [[package]] name = "gcsfs" -version = "2023.9.0" +version = "2023.9.1" description = "Convenient Filesystem interface over GCS" optional = true python-versions = ">=3.8" files = [ - {file = "gcsfs-2023.9.0-py2.py3-none-any.whl", hash = "sha256:6c4cd07b8e6ff1c9edafadefa7559b10b60e7d684ddc4a7f172c55b5b09ddd62"}, - {file = "gcsfs-2023.9.0.tar.gz", hash = "sha256:1e098c7b7d383e0e4779306b5db81699c6c738964ca1b00fe92cee8d6ff0b788"}, + {file = "gcsfs-2023.9.1-py2.py3-none-any.whl", hash = "sha256:c673caf901fc923d121399782394aba9c99dac4332a15ba21bd320d0b7f46521"}, + {file = "gcsfs-2023.9.1.tar.gz", hash = "sha256:47698bba0468896bfb33749552fe459fa745069119d7c2605a53bf4f3a1f09ac"}, ] [package.dependencies] aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" decorator = ">4.1.2" -fsspec = "2023.9.0" +fsspec = "2023.9.1" google-auth = ">=1.2" google-auth-oauthlib = "*" google-cloud-storage = "*" @@ -1072,20 +1070,19 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-auth" -version = "2.22.0" +version = "2.23.0" description = "Google Authentication Library" optional = true -python-versions = ">=3.6" +python-versions = ">=3.7" files = [ - {file = "google-auth-2.22.0.tar.gz", hash = "sha256:164cba9af4e6e4e40c3a4f90a1a6c12ee56f14c0b4868d1ca91b32826ab334ce"}, - {file = "google_auth-2.22.0-py2.py3-none-any.whl", hash = "sha256:d61d1b40897407b574da67da1a833bdc10d5a11642566e506565d1b1a46ba873"}, + {file = "google-auth-2.23.0.tar.gz", hash = "sha256:753a26312e6f1eaeec20bc6f2644a10926697da93446e1f8e24d6d32d45a922a"}, + {file = "google_auth-2.23.0-py2.py3-none-any.whl", hash = "sha256:2cec41407bd1e207f5b802638e32bb837df968bb5c05f413d0fa526fac4cf7a7"}, ] [package.dependencies] cachetools = ">=2.0.0,<6.0" pyasn1-modules = ">=0.2.1" rsa = ">=3.1.4,<5" -six = ">=1.9.0" urllib3 = "<2.0" [package.extras] @@ -1097,13 +1094,13 @@ requests = ["requests (>=2.20.0,<3.0.0.dev0)"] [[package]] name = "google-auth-oauthlib" -version = "1.0.0" +version = "1.1.0" description = "Google Authentication Library" optional = true python-versions = ">=3.6" files = [ - {file = "google-auth-oauthlib-1.0.0.tar.gz", hash = "sha256:e375064964820b47221a7e1b7ee1fd77051b6323c3f9e3e19785f78ab67ecfc5"}, - {file = "google_auth_oauthlib-1.0.0-py2.py3-none-any.whl", hash = "sha256:95880ca704928c300f48194d1770cf5b1462835b6e49db61445a520f793fd5fb"}, + {file = "google-auth-oauthlib-1.1.0.tar.gz", hash = "sha256:83ea8c3b0881e453790baff4448e8a6112ac8778d1de9da0b68010b843937afb"}, + {file = "google_auth_oauthlib-1.1.0-py2.py3-none-any.whl", hash = "sha256:089c6e587d36f4803ac7e0720c045c6a8b1fd1790088b8424975b90d0ee61c12"}, ] [package.dependencies] @@ -1133,20 +1130,20 @@ grpc = ["grpcio (>=1.38.0,<2.0dev)"] [[package]] name = "google-cloud-storage" -version = "2.10.0" +version = "2.11.0" description = "Google Cloud Storage API client library" optional = true python-versions = ">=3.7" files = [ - {file = "google-cloud-storage-2.10.0.tar.gz", hash = "sha256:934b31ead5f3994e5360f9ff5750982c5b6b11604dc072bc452c25965e076dc7"}, - {file = "google_cloud_storage-2.10.0-py2.py3-none-any.whl", hash = "sha256:9433cf28801671de1c80434238fb1e7e4a1ba3087470e90f70c928ea77c2b9d7"}, + {file = "google-cloud-storage-2.11.0.tar.gz", hash = "sha256:6fbf62659b83c8f3a0a743af0d661d2046c97c3a5bfb587c4662c4bc68de3e31"}, + {file = "google_cloud_storage-2.11.0-py2.py3-none-any.whl", hash = "sha256:88cbd7fb3d701c780c4272bc26952db99f25eb283fb4c2208423249f00b5fe53"}, ] [package.dependencies] google-api-core = ">=1.31.5,<2.0.dev0 || >2.3.0,<3.0.0dev" google-auth = ">=1.25.0,<3.0dev" google-cloud-core = ">=2.3.0,<3.0dev" -google-resumable-media = ">=2.3.2" +google-resumable-media = ">=2.6.0" requests = ">=2.18.0,<3.0.0dev" [package.extras] @@ -1234,20 +1231,20 @@ testing = ["pytest"] [[package]] name = "google-resumable-media" -version = "2.5.0" +version = "2.6.0" description = "Utilities for Google Media Downloads and Resumable Uploads" optional = true python-versions = ">= 3.7" files = [ - {file = "google-resumable-media-2.5.0.tar.gz", hash = "sha256:218931e8e2b2a73a58eb354a288e03a0fd5fb1c4583261ac6e4c078666468c93"}, - {file = "google_resumable_media-2.5.0-py2.py3-none-any.whl", hash = "sha256:da1bd943e2e114a56d85d6848497ebf9be6a14d3db23e9fc57581e7c3e8170ec"}, + {file = "google-resumable-media-2.6.0.tar.gz", hash = "sha256:972852f6c65f933e15a4a210c2b96930763b47197cdf4aa5f5bea435efb626e7"}, + {file = "google_resumable_media-2.6.0-py2.py3-none-any.whl", hash = "sha256:fc03d344381970f79eebb632a3c18bb1828593a2dc5572b5f90115ef7d11e81b"}, ] [package.dependencies] google-crc32c = ">=1.0,<2.0dev" [package.extras] -aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)"] +aiohttp = ["aiohttp (>=3.6.2,<4.0.0dev)", "google-auth (>=1.22.0,<2.0dev)"] requests = ["requests (>=2.18.0,<3.0.0dev)"] [[package]] @@ -1344,72 +1341,15 @@ files = [ docs = ["Sphinx", "docutils (<0.18)"] test = ["objgraph", "psutil"] -[[package]] -name = "grpcio" -version = "1.57.0" -description = "HTTP/2-based RPC framework" -optional = true -python-versions = ">=3.7" -files = [ - {file = "grpcio-1.57.0-cp310-cp310-linux_armv7l.whl", hash = "sha256:092fa155b945015754bdf988be47793c377b52b88d546e45c6a9f9579ac7f7b6"}, - {file = "grpcio-1.57.0-cp310-cp310-macosx_12_0_universal2.whl", hash = "sha256:2f7349786da979a94690cc5c2b804cab4e8774a3cf59be40d037c4342c906649"}, - {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_aarch64.whl", hash = "sha256:82640e57fb86ea1d71ea9ab54f7e942502cf98a429a200b2e743d8672171734f"}, - {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:40b72effd4c789de94ce1be2b5f88d7b9b5f7379fe9645f198854112a6567d9a"}, - {file = "grpcio-1.57.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2f708a6a17868ad8bf586598bee69abded4996b18adf26fd2d91191383b79019"}, - {file = "grpcio-1.57.0-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:60fe15288a0a65d5c1cb5b4a62b1850d07336e3ba728257a810317be14f0c527"}, - {file = "grpcio-1.57.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6907b1cf8bb29b058081d2aad677b15757a44ef2d4d8d9130271d2ad5e33efca"}, - {file = "grpcio-1.57.0-cp310-cp310-win32.whl", hash = "sha256:57b183e8b252825c4dd29114d6c13559be95387aafc10a7be645462a0fc98bbb"}, - {file = "grpcio-1.57.0-cp310-cp310-win_amd64.whl", hash = "sha256:7b400807fa749a9eb286e2cd893e501b110b4d356a218426cb9c825a0474ca56"}, - {file = "grpcio-1.57.0-cp311-cp311-linux_armv7l.whl", hash = "sha256:c6ebecfb7a31385393203eb04ed8b6a08f5002f53df3d59e5e795edb80999652"}, - {file = "grpcio-1.57.0-cp311-cp311-macosx_10_10_universal2.whl", hash = "sha256:00258cbe3f5188629828363ae8ff78477ce976a6f63fb2bb5e90088396faa82e"}, - {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_aarch64.whl", hash = "sha256:23e7d8849a0e58b806253fd206ac105b328171e01b8f18c7d5922274958cc87e"}, - {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5371bcd861e679d63b8274f73ac281751d34bd54eccdbfcd6aa00e692a82cd7b"}, - {file = "grpcio-1.57.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:aed90d93b731929e742967e236f842a4a2174dc5db077c8f9ad2c5996f89f63e"}, - {file = "grpcio-1.57.0-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:fe752639919aad9ffb0dee0d87f29a6467d1ef764f13c4644d212a9a853a078d"}, - {file = "grpcio-1.57.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:fada6b07ec4f0befe05218181f4b85176f11d531911b64c715d1875c4736d73a"}, - {file = "grpcio-1.57.0-cp311-cp311-win32.whl", hash = "sha256:bb396952cfa7ad2f01061fbc7dc1ad91dd9d69243bcb8110cf4e36924785a0fe"}, - {file = "grpcio-1.57.0-cp311-cp311-win_amd64.whl", hash = "sha256:e503cb45ed12b924b5b988ba9576dc9949b2f5283b8e33b21dcb6be74a7c58d0"}, - {file = "grpcio-1.57.0-cp37-cp37m-linux_armv7l.whl", hash = "sha256:fd173b4cf02b20f60860dc2ffe30115c18972d7d6d2d69df97ac38dee03be5bf"}, - {file = "grpcio-1.57.0-cp37-cp37m-macosx_10_10_universal2.whl", hash = "sha256:d7f8df114d6b4cf5a916b98389aeaf1e3132035420a88beea4e3d977e5f267a5"}, - {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_aarch64.whl", hash = "sha256:76c44efa4ede1f42a9d5b2fed1fe9377e73a109bef8675fb0728eb80b0b8e8f2"}, - {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4faea2cfdf762a664ab90589b66f416274887641ae17817de510b8178356bf73"}, - {file = "grpcio-1.57.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c60b83c43faeb6d0a9831f0351d7787a0753f5087cc6fa218d78fdf38e5acef0"}, - {file = "grpcio-1.57.0-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b363bbb5253e5f9c23d8a0a034dfdf1b7c9e7f12e602fc788c435171e96daccc"}, - {file = "grpcio-1.57.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:f1fb0fd4a1e9b11ac21c30c169d169ef434c6e9344ee0ab27cfa6f605f6387b2"}, - {file = "grpcio-1.57.0-cp37-cp37m-win_amd64.whl", hash = "sha256:34950353539e7d93f61c6796a007c705d663f3be41166358e3d88c45760c7d98"}, - {file = "grpcio-1.57.0-cp38-cp38-linux_armv7l.whl", hash = "sha256:871f9999e0211f9551f368612460442a5436d9444606184652117d6a688c9f51"}, - {file = "grpcio-1.57.0-cp38-cp38-macosx_10_10_universal2.whl", hash = "sha256:a8a8e560e8dbbdf29288872e91efd22af71e88b0e5736b0daf7773c1fecd99f0"}, - {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_aarch64.whl", hash = "sha256:2313b124e475aa9017a9844bdc5eafb2d5abdda9d456af16fc4535408c7d6da6"}, - {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:b4098b6b638d9e0ca839a81656a2fd4bc26c9486ea707e8b1437d6f9d61c3941"}, - {file = "grpcio-1.57.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5e5b58e32ae14658085c16986d11e99abd002ddbf51c8daae8a0671fffb3467f"}, - {file = "grpcio-1.57.0-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:0f80bf37f09e1caba6a8063e56e2b87fa335add314cf2b78ebf7cb45aa7e3d06"}, - {file = "grpcio-1.57.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5b7a4ce8f862fe32b2a10b57752cf3169f5fe2915acfe7e6a1e155db3da99e79"}, - {file = "grpcio-1.57.0-cp38-cp38-win32.whl", hash = "sha256:9338bacf172e942e62e5889b6364e56657fbf8ac68062e8b25c48843e7b202bb"}, - {file = "grpcio-1.57.0-cp38-cp38-win_amd64.whl", hash = "sha256:e1cb52fa2d67d7f7fab310b600f22ce1ff04d562d46e9e0ac3e3403c2bb4cc16"}, - {file = "grpcio-1.57.0-cp39-cp39-linux_armv7l.whl", hash = "sha256:fee387d2fab144e8a34e0e9c5ca0f45c9376b99de45628265cfa9886b1dbe62b"}, - {file = "grpcio-1.57.0-cp39-cp39-macosx_10_10_universal2.whl", hash = "sha256:b53333627283e7241fcc217323f225c37783b5f0472316edcaa4479a213abfa6"}, - {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_aarch64.whl", hash = "sha256:f19ac6ac0a256cf77d3cc926ef0b4e64a9725cc612f97228cd5dc4bd9dbab03b"}, - {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e3fdf04e402f12e1de8074458549337febb3b45f21076cc02ef4ff786aff687e"}, - {file = "grpcio-1.57.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5613a2fecc82f95d6c51d15b9a72705553aa0d7c932fad7aed7afb51dc982ee5"}, - {file = "grpcio-1.57.0-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:b670c2faa92124b7397b42303e4d8eb64a4cd0b7a77e35a9e865a55d61c57ef9"}, - {file = "grpcio-1.57.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:7a635589201b18510ff988161b7b573f50c6a48fae9cb567657920ca82022b37"}, - {file = "grpcio-1.57.0-cp39-cp39-win32.whl", hash = "sha256:d78d8b86fcdfa1e4c21f8896614b6cc7ee01a2a758ec0c4382d662f2a62cf766"}, - {file = "grpcio-1.57.0-cp39-cp39-win_amd64.whl", hash = "sha256:20ec6fc4ad47d1b6e12deec5045ec3cd5402d9a1597f738263e98f490fe07056"}, - {file = "grpcio-1.57.0.tar.gz", hash = "sha256:4b089f7ad1eb00a104078bab8015b0ed0ebcb3b589e527ab009c53893fd4e613"}, -] - -[package.extras] -protobuf = ["grpcio-tools (>=1.57.0)"] - [[package]] name = "identify" -version = "2.5.27" +version = "2.5.29" description = "File identification library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "identify-2.5.27-py2.py3-none-any.whl", hash = "sha256:fdb527b2dfe24602809b2201e033c2a113d7bdf716db3ca8e3243f735dcecaba"}, - {file = "identify-2.5.27.tar.gz", hash = "sha256:287b75b04a0e22d727bc9a41f0d4f3c1bcada97490fa6eabb5b28f0e9097e733"}, + {file = "identify-2.5.29-py2.py3-none-any.whl", hash = "sha256:24437fbf6f4d3fe6efd0eb9d67e24dd9106db99af5ceb27996a5f7895f24bf1b"}, + {file = "identify-2.5.29.tar.gz", hash = "sha256:d43d52b86b15918c137e3a74fff5224f60385cd0e9c38e99d07c257f02f151a5"}, ] [package.extras] @@ -1447,21 +1387,21 @@ testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pyfakefs [[package]] name = "importlib-resources" -version = "6.0.1" +version = "6.1.0" description = "Read resources from Python packages" optional = true python-versions = ">=3.8" files = [ - {file = "importlib_resources-6.0.1-py3-none-any.whl", hash = "sha256:134832a506243891221b88b4ae1213327eea96ceb4e407a00d790bb0626f45cf"}, - {file = "importlib_resources-6.0.1.tar.gz", hash = "sha256:4359457e42708462b9626a04657c6208ad799ceb41e5c58c57ffa0e6a098a5d4"}, + {file = "importlib_resources-6.1.0-py3-none-any.whl", hash = "sha256:aa50258bbfa56d4e33fbd8aa3ef48ded10d1735f11532b8df95388cc6bdb7e83"}, + {file = "importlib_resources-6.1.0.tar.gz", hash = "sha256:9d48dcccc213325e810fd723e7fbb45ccb39f6cf5c31f00cf2b965f5f10f3cb9"}, ] [package.dependencies] zipp = {version = ">=3.1.0", markers = "python_version < \"3.10\""} [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] +testing = ["pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff", "zipp (>=3.17)"] [[package]] name = "iniconfig" @@ -1518,13 +1458,13 @@ files = [ [[package]] name = "jsonschema" -version = "4.19.0" +version = "4.19.1" description = "An implementation of JSON Schema validation for Python" optional = true python-versions = ">=3.8" files = [ - {file = "jsonschema-4.19.0-py3-none-any.whl", hash = "sha256:043dc26a3845ff09d20e4420d6012a9c91c9aa8999fa184e7efcfeccb41e32cb"}, - {file = "jsonschema-4.19.0.tar.gz", hash = "sha256:6e1e7569ac13be8139b2dd2c21a55d350066ee3f80df06c608b398cdc6f30e8f"}, + {file = "jsonschema-4.19.1-py3-none-any.whl", hash = "sha256:cd5f1f9ed9444e554b38ba003af06c0a8c2868131e56bfbef0550fb450c0330e"}, + {file = "jsonschema-4.19.1.tar.gz", hash = "sha256:ec84cc37cfa703ef7cd4928db24f9cb31428a5d0fa77747b8b51a847458e0bbf"}, ] [package.dependencies] @@ -1751,13 +1691,13 @@ xray = ["aws-xray-sdk (>=0.93,!=0.96)", "setuptools"] [[package]] name = "msal" -version = "1.23.0" -description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." +version = "1.24.0" +description = "The Microsoft Authentication Library (MSAL) for Python library" optional = true -python-versions = "*" +python-versions = ">=2.7" files = [ - {file = "msal-1.23.0-py2.py3-none-any.whl", hash = "sha256:3342e0837a047007f9d479e814b559c3219767453d57920dc40a31986862048b"}, - {file = "msal-1.23.0.tar.gz", hash = "sha256:25c9a33acf84301f93d1fdbe9f1a9c60cd38af0d5fffdbfa378138fc7bc1e86b"}, + {file = "msal-1.24.0-py2.py3-none-any.whl", hash = "sha256:a7f2f342b80ba3fe168218003b6798cc81b83c9745284bf63fb8d4ec8e2dbc50"}, + {file = "msal-1.24.0.tar.gz", hash = "sha256:7d2ecdad41a5f73bb2b813f3061a4cf47c924621105a8ed137586fcb9d8f827e"}, ] [package.dependencies] @@ -1788,74 +1728,67 @@ portalocker = [ [[package]] name = "msgpack" -version = "1.0.5" +version = "1.0.6" description = "MessagePack serializer" optional = true -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "msgpack-1.0.5-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:525228efd79bb831cf6830a732e2e80bc1b05436b086d4264814b4b2955b2fa9"}, - {file = "msgpack-1.0.5-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:4f8d8b3bf1ff2672567d6b5c725a1b347fe838b912772aa8ae2bf70338d5a198"}, - {file = "msgpack-1.0.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cdc793c50be3f01106245a61b739328f7dccc2c648b501e237f0699fe1395b81"}, - {file = "msgpack-1.0.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5cb47c21a8a65b165ce29f2bec852790cbc04936f502966768e4aae9fa763cb7"}, - {file = "msgpack-1.0.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e42b9594cc3bf4d838d67d6ed62b9e59e201862a25e9a157019e171fbe672dd3"}, - {file = "msgpack-1.0.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:55b56a24893105dc52c1253649b60f475f36b3aa0fc66115bffafb624d7cb30b"}, - {file = "msgpack-1.0.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:1967f6129fc50a43bfe0951c35acbb729be89a55d849fab7686004da85103f1c"}, - {file = "msgpack-1.0.5-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:20a97bf595a232c3ee6d57ddaadd5453d174a52594bf9c21d10407e2a2d9b3bd"}, - {file = "msgpack-1.0.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:d25dd59bbbbb996eacf7be6b4ad082ed7eacc4e8f3d2df1ba43822da9bfa122a"}, - {file = "msgpack-1.0.5-cp310-cp310-win32.whl", hash = "sha256:382b2c77589331f2cb80b67cc058c00f225e19827dbc818d700f61513ab47bea"}, - {file = "msgpack-1.0.5-cp310-cp310-win_amd64.whl", hash = "sha256:4867aa2df9e2a5fa5f76d7d5565d25ec76e84c106b55509e78c1ede0f152659a"}, - {file = "msgpack-1.0.5-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9f5ae84c5c8a857ec44dc180a8b0cc08238e021f57abdf51a8182e915e6299f0"}, - {file = "msgpack-1.0.5-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9e6ca5d5699bcd89ae605c150aee83b5321f2115695e741b99618f4856c50898"}, - {file = "msgpack-1.0.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:5494ea30d517a3576749cad32fa27f7585c65f5f38309c88c6d137877fa28a5a"}, - {file = "msgpack-1.0.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1ab2f3331cb1b54165976a9d976cb251a83183631c88076613c6c780f0d6e45a"}, - {file = "msgpack-1.0.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:28592e20bbb1620848256ebc105fc420436af59515793ed27d5c77a217477705"}, - {file = "msgpack-1.0.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:fe5c63197c55bce6385d9aee16c4d0641684628f63ace85f73571e65ad1c1e8d"}, - {file = "msgpack-1.0.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ed40e926fa2f297e8a653c954b732f125ef97bdd4c889f243182299de27e2aa9"}, - {file = "msgpack-1.0.5-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:b2de4c1c0538dcb7010902a2b97f4e00fc4ddf2c8cda9749af0e594d3b7fa3d7"}, - {file = "msgpack-1.0.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:bf22a83f973b50f9d38e55c6aade04c41ddda19b00c4ebc558930d78eecc64ed"}, - {file = "msgpack-1.0.5-cp311-cp311-win32.whl", hash = "sha256:c396e2cc213d12ce017b686e0f53497f94f8ba2b24799c25d913d46c08ec422c"}, - {file = "msgpack-1.0.5-cp311-cp311-win_amd64.whl", hash = "sha256:6c4c68d87497f66f96d50142a2b73b97972130d93677ce930718f68828b382e2"}, - {file = "msgpack-1.0.5-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:a2b031c2e9b9af485d5e3c4520f4220d74f4d222a5b8dc8c1a3ab9448ca79c57"}, - {file = "msgpack-1.0.5-cp36-cp36m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4f837b93669ce4336e24d08286c38761132bc7ab29782727f8557e1eb21b2080"}, - {file = "msgpack-1.0.5-cp36-cp36m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b1d46dfe3832660f53b13b925d4e0fa1432b00f5f7210eb3ad3bb9a13c6204a6"}, - {file = "msgpack-1.0.5-cp36-cp36m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:366c9a7b9057e1547f4ad51d8facad8b406bab69c7d72c0eb6f529cf76d4b85f"}, - {file = "msgpack-1.0.5-cp36-cp36m-musllinux_1_1_aarch64.whl", hash = "sha256:4c075728a1095efd0634a7dccb06204919a2f67d1893b6aa8e00497258bf926c"}, - {file = "msgpack-1.0.5-cp36-cp36m-musllinux_1_1_i686.whl", hash = "sha256:f933bbda5a3ee63b8834179096923b094b76f0c7a73c1cfe8f07ad608c58844b"}, - {file = "msgpack-1.0.5-cp36-cp36m-musllinux_1_1_x86_64.whl", hash = "sha256:36961b0568c36027c76e2ae3ca1132e35123dcec0706c4b7992683cc26c1320c"}, - {file = "msgpack-1.0.5-cp36-cp36m-win32.whl", hash = "sha256:b5ef2f015b95f912c2fcab19c36814963b5463f1fb9049846994b007962743e9"}, - {file = "msgpack-1.0.5-cp36-cp36m-win_amd64.whl", hash = "sha256:288e32b47e67f7b171f86b030e527e302c91bd3f40fd9033483f2cacc37f327a"}, - {file = "msgpack-1.0.5-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:137850656634abddfb88236008339fdaba3178f4751b28f270d2ebe77a563b6c"}, - {file = "msgpack-1.0.5-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0c05a4a96585525916b109bb85f8cb6511db1c6f5b9d9cbcbc940dc6b4be944b"}, - {file = "msgpack-1.0.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56a62ec00b636583e5cb6ad313bbed36bb7ead5fa3a3e38938503142c72cba4f"}, - {file = "msgpack-1.0.5-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ef8108f8dedf204bb7b42994abf93882da1159728a2d4c5e82012edd92c9da9f"}, - {file = "msgpack-1.0.5-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1835c84d65f46900920b3708f5ba829fb19b1096c1800ad60bae8418652a951d"}, - {file = "msgpack-1.0.5-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:e57916ef1bd0fee4f21c4600e9d1da352d8816b52a599c46460e93a6e9f17086"}, - {file = "msgpack-1.0.5-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:17358523b85973e5f242ad74aa4712b7ee560715562554aa2134d96e7aa4cbbf"}, - {file = "msgpack-1.0.5-cp37-cp37m-win32.whl", hash = "sha256:cb5aaa8c17760909ec6cb15e744c3ebc2ca8918e727216e79607b7bbce9c8f77"}, - {file = "msgpack-1.0.5-cp37-cp37m-win_amd64.whl", hash = "sha256:ab31e908d8424d55601ad7075e471b7d0140d4d3dd3272daf39c5c19d936bd82"}, - {file = "msgpack-1.0.5-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b72d0698f86e8d9ddf9442bdedec15b71df3598199ba33322d9711a19f08145c"}, - {file = "msgpack-1.0.5-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:379026812e49258016dd84ad79ac8446922234d498058ae1d415f04b522d5b2d"}, - {file = "msgpack-1.0.5-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:332360ff25469c346a1c5e47cbe2a725517919892eda5cfaffe6046656f0b7bb"}, - {file = "msgpack-1.0.5-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:476a8fe8fae289fdf273d6d2a6cb6e35b5a58541693e8f9f019bfe990a51e4ba"}, - {file = "msgpack-1.0.5-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a9985b214f33311df47e274eb788a5893a761d025e2b92c723ba4c63936b69b1"}, - {file = "msgpack-1.0.5-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:48296af57cdb1d885843afd73c4656be5c76c0c6328db3440c9601a98f303d87"}, - {file = "msgpack-1.0.5-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:addab7e2e1fcc04bd08e4eb631c2a90960c340e40dfc4a5e24d2ff0d5a3b3edb"}, - {file = "msgpack-1.0.5-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:916723458c25dfb77ff07f4c66aed34e47503b2eb3188b3adbec8d8aa6e00f48"}, - {file = "msgpack-1.0.5-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:821c7e677cc6acf0fd3f7ac664c98803827ae6de594a9f99563e48c5a2f27eb0"}, - {file = "msgpack-1.0.5-cp38-cp38-win32.whl", hash = "sha256:1c0f7c47f0087ffda62961d425e4407961a7ffd2aa004c81b9c07d9269512f6e"}, - {file = "msgpack-1.0.5-cp38-cp38-win_amd64.whl", hash = "sha256:bae7de2026cbfe3782c8b78b0db9cbfc5455e079f1937cb0ab8d133496ac55e1"}, - {file = "msgpack-1.0.5-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:20c784e66b613c7f16f632e7b5e8a1651aa5702463d61394671ba07b2fc9e025"}, - {file = "msgpack-1.0.5-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:266fa4202c0eb94d26822d9bfd7af25d1e2c088927fe8de9033d929dd5ba24c5"}, - {file = "msgpack-1.0.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:18334484eafc2b1aa47a6d42427da7fa8f2ab3d60b674120bce7a895a0a85bdd"}, - {file = "msgpack-1.0.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:57e1f3528bd95cc44684beda696f74d3aaa8a5e58c816214b9046512240ef437"}, - {file = "msgpack-1.0.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:586d0d636f9a628ddc6a17bfd45aa5b5efaf1606d2b60fa5d87b8986326e933f"}, - {file = "msgpack-1.0.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a740fa0e4087a734455f0fc3abf5e746004c9da72fbd541e9b113013c8dc3282"}, - {file = "msgpack-1.0.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:3055b0455e45810820db1f29d900bf39466df96ddca11dfa6d074fa47054376d"}, - {file = "msgpack-1.0.5-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:a61215eac016f391129a013c9e46f3ab308db5f5ec9f25811e811f96962599a8"}, - {file = "msgpack-1.0.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:362d9655cd369b08fda06b6657a303eb7172d5279997abe094512e919cf74b11"}, - {file = "msgpack-1.0.5-cp39-cp39-win32.whl", hash = "sha256:ac9dd47af78cae935901a9a500104e2dea2e253207c924cc95de149606dc43cc"}, - {file = "msgpack-1.0.5-cp39-cp39-win_amd64.whl", hash = "sha256:06f5174b5f8ed0ed919da0e62cbd4ffde676a374aba4020034da05fab67b9164"}, - {file = "msgpack-1.0.5.tar.gz", hash = "sha256:c075544284eadc5cddc70f4757331d99dcbc16b2bbd4849d15f8aae4cf36d31c"}, + {file = "msgpack-1.0.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f4321692e7f299277e55f322329b2c972d93bb612d85f3fda8741bec5c6285ce"}, + {file = "msgpack-1.0.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1f0e36a5fa7a182cde391a128a64f437657d2b9371dfa42eda3436245adccbf5"}, + {file = "msgpack-1.0.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b5c8dd9a386a66e50bd7fa22b7a49fb8ead2b3574d6bd69eb1caced6caea0803"}, + {file = "msgpack-1.0.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9f85200ea102276afdd3749ca94747f057bbb868d1c52921ee2446730b508d0f"}, + {file = "msgpack-1.0.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7a006c300e82402c0c8f1ded11352a3ba2a61b87e7abb3054c845af2ca8d553c"}, + {file = "msgpack-1.0.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:33bbf47ea5a6ff20c23426106e81863cdbb5402de1825493026ce615039cc99d"}, + {file = "msgpack-1.0.6-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:04450e4b5e1e662e7c86b6aafb7c230af9334fd0becf5e6b80459a507884241c"}, + {file = "msgpack-1.0.6-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:b06a5095a79384760625b5de3f83f40b3053a385fb893be8a106fbbd84c14980"}, + {file = "msgpack-1.0.6-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3910211b0ab20be3a38e0bb944ed45bd4265d8d9f11a3d1674b95b298e08dd5c"}, + {file = "msgpack-1.0.6-cp310-cp310-win32.whl", hash = "sha256:1dc67b40fe81217b308ab12651adba05e7300b3a2ccf84d6b35a878e308dd8d4"}, + {file = "msgpack-1.0.6-cp310-cp310-win_amd64.whl", hash = "sha256:885de1ed5ea01c1bfe0a34c901152a264c3c1f8f1d382042b92ea354bd14bb0e"}, + {file = "msgpack-1.0.6-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:099c3d8a027367e1a6fc55d15336f04ff65c60c4f737b5739f7db4525c65fe9e"}, + {file = "msgpack-1.0.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:9b88dc97ba86c96b964c3745a445d9a65f76fe21955a953064fe04adb63e9367"}, + {file = "msgpack-1.0.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:00ce5f827d4f26fc094043e6f08b6069c1b148efa2631c47615ae14fb6cafc89"}, + {file = "msgpack-1.0.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bd6af61388be65a8701f5787362cb54adae20007e0cc67ca9221a4b95115583b"}, + {file = "msgpack-1.0.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:652e4b7497825b0af6259e2c54700e6dc33d2fc4ed92b8839435090d4c9cc911"}, + {file = "msgpack-1.0.6-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5b08676a17e3f791daad34d5fcb18479e9c85e7200d5a17cbe8de798643a7e37"}, + {file = "msgpack-1.0.6-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:229ccb6713c8b941eaa5cf13dc7478eba117f21513b5893c35e44483e2f0c9c8"}, + {file = "msgpack-1.0.6-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:95ade0bd4cf69e04e8b8f8ec2d197d9c9c4a9b6902e048dc7456bf6d82e12a80"}, + {file = "msgpack-1.0.6-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5b16344032a27b2ccfd341f89dadf3e4ef6407d91e4b93563c14644a8abb3ad7"}, + {file = "msgpack-1.0.6-cp311-cp311-win32.whl", hash = "sha256:55bb4a1bf94e39447bc08238a2fb8a767460388a8192f67c103442eb36920887"}, + {file = "msgpack-1.0.6-cp311-cp311-win_amd64.whl", hash = "sha256:ae97504958d0bc58c1152045c170815d5c4f8af906561ce044b6358b43d0c97e"}, + {file = "msgpack-1.0.6-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:7ecf431786019a7bfedc28281531d706627f603e3691d64eccdbce3ecd353823"}, + {file = "msgpack-1.0.6-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:a635aecf1047255576dbb0927cbf9a7aa4a68e9d54110cc3c926652d18f144e0"}, + {file = "msgpack-1.0.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:102cfb54eaefa73e8ca1e784b9352c623524185c98e057e519545131a56fb0af"}, + {file = "msgpack-1.0.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5c5e05e4f5756758c58a8088aa10dc70d851c89f842b611fdccfc0581c1846bc"}, + {file = "msgpack-1.0.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:68569509dd015fcdd1e6b2b3ccc8c51fd27d9a97f461ccc909270e220ee09685"}, + {file = "msgpack-1.0.6-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bf652839d16de91fe1cfb253e0a88db9a548796939533894e07f45d4bdf90a5f"}, + {file = "msgpack-1.0.6-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:14db7e1b7a7ed362b2f94897bf2486c899c8bb50f6e34b2db92fe534cdab306f"}, + {file = "msgpack-1.0.6-cp312-cp312-musllinux_1_1_i686.whl", hash = "sha256:159cfec18a6e125dd4723e2b1de6f202b34b87c850fb9d509acfd054c01135e9"}, + {file = "msgpack-1.0.6-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:6a01a072b2219b65a6ff74df208f20b2cac9401c60adb676ee34e53b4c651077"}, + {file = "msgpack-1.0.6-cp312-cp312-win32.whl", hash = "sha256:e36560d001d4ba469d469b02037f2dd404421fd72277d9474efe9f03f83fced5"}, + {file = "msgpack-1.0.6-cp312-cp312-win_amd64.whl", hash = "sha256:5e7fae9ca93258a956551708cf60dc6c8145574e32ce8c8c4d894e63bcb04341"}, + {file = "msgpack-1.0.6-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:40b801b768f5a765e33c68f30665d3c6ee1c8623a2d2bb78e6e59f2db4e4ceb7"}, + {file = "msgpack-1.0.6-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:da057d3652e698b00746e47f06dbb513314f847421e857e32e1dc61c46f6c052"}, + {file = "msgpack-1.0.6-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:f75114c05ec56566da6b55122791cf5bb53d5aada96a98c016d6231e03132f76"}, + {file = "msgpack-1.0.6-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:61213482b5a387ead9e250e9e3cb290292feca39dc83b41c3b1b7b8ffc8d8ecb"}, + {file = "msgpack-1.0.6-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bae6c561f11b444b258b1b4be2bdd1e1cf93cd1d80766b7e869a79db4543a8a8"}, + {file = "msgpack-1.0.6-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:619a63753ba9e792fe3c6c0fc2b9ee2cfbd92153dd91bee029a89a71eb2942cd"}, + {file = "msgpack-1.0.6-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:70843788c85ca385846a2d2f836efebe7bb2687ca0734648bf5c9dc6c55602d2"}, + {file = "msgpack-1.0.6-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:fb4571efe86545b772a4630fee578c213c91cbcfd20347806e47fd4e782a18fe"}, + {file = "msgpack-1.0.6-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:bbb4448a05d261fae423d5c0b0974ad899f60825bc77eabad5a0c518e78448c2"}, + {file = "msgpack-1.0.6-cp38-cp38-win32.whl", hash = "sha256:5cd67674db3c73026e0a2c729b909780e88bd9cbc8184256f9567640a5d299a8"}, + {file = "msgpack-1.0.6-cp38-cp38-win_amd64.whl", hash = "sha256:a1cf98afa7ad5e7012454ca3fde254499a13f9d92fd50cb46118118a249a1355"}, + {file = "msgpack-1.0.6-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:d6d25b8a5c70e2334ed61a8da4c11cd9b97c6fbd980c406033f06e4463fda006"}, + {file = "msgpack-1.0.6-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:88cdb1da7fdb121dbb3116910722f5acab4d6e8bfcacab8fafe27e2e7744dc6a"}, + {file = "msgpack-1.0.6-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3b5658b1f9e486a2eec4c0c688f213a90085b9cf2fec76ef08f98fdf6c62f4b9"}, + {file = "msgpack-1.0.6-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76820f2ece3b0a7c948bbb6a599020e29574626d23a649476def023cbb026787"}, + {file = "msgpack-1.0.6-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9c780d992f5d734432726b92a0c87bf1857c3d85082a8dea29cbf56e44a132b3"}, + {file = "msgpack-1.0.6-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e0ed35d6d6122d0baa9a1b59ebca4ee302139f4cfb57dab85e4c73ab793ae7ed"}, + {file = "msgpack-1.0.6-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:32c0aff31f33033f4961abc01f78497e5e07bac02a508632aef394b384d27428"}, + {file = "msgpack-1.0.6-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:35ad5aed9b52217d4cea739d0ea3a492a18dd86fecb4b132668a69f27fb0363b"}, + {file = "msgpack-1.0.6-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:47275ff73005a3e5e146e50baa2378e1730cba6e292f0222bc496a8e4c4adfc8"}, + {file = "msgpack-1.0.6-cp39-cp39-win32.whl", hash = "sha256:7baf16fd8908a025c4a8d7b699103e72d41f967e2aee5a2065432bcdbd9fd06e"}, + {file = "msgpack-1.0.6-cp39-cp39-win_amd64.whl", hash = "sha256:fc97aa4b4fb928ff4d3b74da7c30b360d0cb3ede49a5a6e1fd9705f49aea1deb"}, + {file = "msgpack-1.0.6.tar.gz", hash = "sha256:25d3746da40f3c8c59c3b1d001e49fd2aa17904438f980d9a391370366df001e"}, ] [[package]] @@ -1941,6 +1874,20 @@ files = [ {file = "multidict-6.0.4.tar.gz", hash = "sha256:3666906492efb76453c0e7b97f2cf459b0682e7402c0489a95484965dbc1da49"}, ] +[[package]] +name = "mypy-boto3-glue" +version = "1.28.36" +description = "Type annotations for boto3.Glue 1.28.36 service generated with mypy-boto3-builder 7.18.0" +optional = true +python-versions = ">=3.7" +files = [ + {file = "mypy-boto3-glue-1.28.36.tar.gz", hash = "sha256:161771252bb6a220a0bfd8e6ad71da8548599c611f95fe8a94846f4a3386d2ae"}, + {file = "mypy_boto3_glue-1.28.36-py3-none-any.whl", hash = "sha256:73bc14616ac65a5c02adea5efba7bbbcf8207cd0c0e3237c13d351ebc916338d"}, +] + +[package.dependencies] +typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.12\""} + [[package]] name = "nodeenv" version = "1.8.0" @@ -2026,6 +1973,47 @@ files = [ {file = "numpy-1.25.2.tar.gz", hash = "sha256:fd608e19c8d7c55021dffd43bfe5492fab8cc105cc8986f813f8c3c048b38760"}, ] +[[package]] +name = "numpy" +version = "1.26.0" +description = "Fundamental package for array computing in Python" +optional = true +python-versions = "<3.13,>=3.9" +files = [ + {file = "numpy-1.26.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f8db2f125746e44dce707dd44d4f4efeea8d7e2b43aace3f8d1f235cfa2733dd"}, + {file = "numpy-1.26.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0621f7daf973d34d18b4e4bafb210bbaf1ef5e0100b5fa750bd9cde84c7ac292"}, + {file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:51be5f8c349fdd1a5568e72713a21f518e7d6707bcf8503b528b88d33b57dc68"}, + {file = "numpy-1.26.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:767254ad364991ccfc4d81b8152912e53e103ec192d1bb4ea6b1f5a7117040be"}, + {file = "numpy-1.26.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:436c8e9a4bdeeee84e3e59614d38c3dbd3235838a877af8c211cfcac8a80b8d3"}, + {file = "numpy-1.26.0-cp310-cp310-win32.whl", hash = "sha256:c2e698cb0c6dda9372ea98a0344245ee65bdc1c9dd939cceed6bb91256837896"}, + {file = "numpy-1.26.0-cp310-cp310-win_amd64.whl", hash = "sha256:09aaee96c2cbdea95de76ecb8a586cb687d281c881f5f17bfc0fb7f5890f6b91"}, + {file = "numpy-1.26.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:637c58b468a69869258b8ae26f4a4c6ff8abffd4a8334c830ffb63e0feefe99a"}, + {file = "numpy-1.26.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:306545e234503a24fe9ae95ebf84d25cba1fdc27db971aa2d9f1ab6bba19a9dd"}, + {file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8c6adc33561bd1d46f81131d5352348350fc23df4d742bb246cdfca606ea1208"}, + {file = "numpy-1.26.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e062aa24638bb5018b7841977c360d2f5917268d125c833a686b7cbabbec496c"}, + {file = "numpy-1.26.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:546b7dd7e22f3c6861463bebb000646fa730e55df5ee4a0224408b5694cc6148"}, + {file = "numpy-1.26.0-cp311-cp311-win32.whl", hash = "sha256:c0b45c8b65b79337dee5134d038346d30e109e9e2e9d43464a2970e5c0e93229"}, + {file = "numpy-1.26.0-cp311-cp311-win_amd64.whl", hash = "sha256:eae430ecf5794cb7ae7fa3808740b015aa80747e5266153128ef055975a72b99"}, + {file = "numpy-1.26.0-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:166b36197e9debc4e384e9c652ba60c0bacc216d0fc89e78f973a9760b503388"}, + {file = "numpy-1.26.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f042f66d0b4ae6d48e70e28d487376204d3cbf43b84c03bac57e28dac6151581"}, + {file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e5e18e5b14a7560d8acf1c596688f4dfd19b4f2945b245a71e5af4ddb7422feb"}, + {file = "numpy-1.26.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f6bad22a791226d0a5c7c27a80a20e11cfe09ad5ef9084d4d3fc4a299cca505"}, + {file = "numpy-1.26.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:4acc65dd65da28060e206c8f27a573455ed724e6179941edb19f97e58161bb69"}, + {file = "numpy-1.26.0-cp312-cp312-win32.whl", hash = "sha256:bb0d9a1aaf5f1cb7967320e80690a1d7ff69f1d47ebc5a9bea013e3a21faec95"}, + {file = "numpy-1.26.0-cp312-cp312-win_amd64.whl", hash = "sha256:ee84ca3c58fe48b8ddafdeb1db87388dce2c3c3f701bf447b05e4cfcc3679112"}, + {file = "numpy-1.26.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4a873a8180479bc829313e8d9798d5234dfacfc2e8a7ac188418189bb8eafbd2"}, + {file = "numpy-1.26.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:914b28d3215e0c721dc75db3ad6d62f51f630cb0c277e6b3bcb39519bed10bd8"}, + {file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c78a22e95182fb2e7874712433eaa610478a3caf86f28c621708d35fa4fd6e7f"}, + {file = "numpy-1.26.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86f737708b366c36b76e953c46ba5827d8c27b7a8c9d0f471810728e5a2fe57c"}, + {file = "numpy-1.26.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:b44e6a09afc12952a7d2a58ca0a2429ee0d49a4f89d83a0a11052da696440e49"}, + {file = "numpy-1.26.0-cp39-cp39-win32.whl", hash = "sha256:5671338034b820c8d58c81ad1dafc0ed5a00771a82fccc71d6438df00302094b"}, + {file = "numpy-1.26.0-cp39-cp39-win_amd64.whl", hash = "sha256:020cdbee66ed46b671429c7265cf00d8ac91c046901c55684954c3958525dab2"}, + {file = "numpy-1.26.0-pp39-pypy39_pp73-macosx_10_9_x86_64.whl", hash = "sha256:0792824ce2f7ea0c82ed2e4fecc29bb86bee0567a080dacaf2e0a01fe7654369"}, + {file = "numpy-1.26.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7d484292eaeb3e84a51432a94f53578689ffdea3f90e10c8b203a99be5af57d8"}, + {file = "numpy-1.26.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:186ba67fad3c60dbe8a3abff3b67a91351100f2661c8e2a80364ae6279720299"}, + {file = "numpy-1.26.0.tar.gz", hash = "sha256:f93fc78fe8bf15afe2b8d6b6499f1c73953169fad1e9a8dd086cdff3190e7fdf"}, +] + [[package]] name = "oauthlib" version = "3.2.2" @@ -2090,8 +2078,8 @@ files = [ [package.dependencies] numpy = [ {version = ">=1.20.3", markers = "python_version < \"3.10\""}, - {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, + {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -2163,14 +2151,13 @@ testing = ["pytest", "pytest-benchmark"] [[package]] name = "portalocker" -version = "2.7.0" +version = "2.8.2" description = "Wraps the portalocker recipe for easy usage" optional = true -python-versions = ">=3.5" +python-versions = ">=3.8" files = [ - {file = "portalocker-2.7.0-py2.py3-none-any.whl", hash = "sha256:a07c5b4f3985c3cf4798369631fb7011adb498e2a46d8440efc75a8f29a0f983"}, - {file = "portalocker-2.7.0-py3-none-any.whl", hash = "sha256:769c394f934f3459fab9f0989c17aa8158334f1898b9bb8407432bdc3f44be6d"}, - {file = "portalocker-2.7.0.tar.gz", hash = "sha256:032e81d534a88ec1736d03f780ba073f047a06c478b06e2937486f334e955c51"}, + {file = "portalocker-2.8.2-py3-none-any.whl", hash = "sha256:cfb86acc09b9aa7c3b43594e19be1345b9d16af3feb08bf92f23d4dce513a28e"}, + {file = "portalocker-2.8.2.tar.gz", hash = "sha256:2b035aa7828e46c58e9b31390ee1f169b98e1066ab10b9a6a861fe7e25ee4f33"}, ] [package.dependencies] @@ -2179,7 +2166,7 @@ pywin32 = {version = ">=226", markers = "platform_system == \"Windows\""} [package.extras] docs = ["sphinx (>=1.7.1)"] redis = ["redis"] -tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)"] +tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "pytest-timeout (>=2.1.0)", "redis", "sphinx (>=6.0.0)", "types-redis"] [[package]] name = "pre-commit" @@ -2201,24 +2188,24 @@ virtualenv = ">=20.10.0" [[package]] name = "protobuf" -version = "4.24.2" +version = "4.24.3" description = "" optional = true python-versions = ">=3.7" files = [ - {file = "protobuf-4.24.2-cp310-abi3-win32.whl", hash = "sha256:58e12d2c1aa428ece2281cef09bbaa6938b083bcda606db3da4e02e991a0d924"}, - {file = "protobuf-4.24.2-cp310-abi3-win_amd64.whl", hash = "sha256:77700b55ba41144fc64828e02afb41901b42497b8217b558e4a001f18a85f2e3"}, - {file = "protobuf-4.24.2-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:237b9a50bd3b7307d0d834c1b0eb1a6cd47d3f4c2da840802cd03ea288ae8880"}, - {file = "protobuf-4.24.2-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:25ae91d21e3ce8d874211110c2f7edd6384816fb44e06b2867afe35139e1fd1c"}, - {file = "protobuf-4.24.2-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:c00c3c7eb9ad3833806e21e86dca448f46035242a680f81c3fe068ff65e79c74"}, - {file = "protobuf-4.24.2-cp37-cp37m-win32.whl", hash = "sha256:4e69965e7e54de4db989289a9b971a099e626f6167a9351e9d112221fc691bc1"}, - {file = "protobuf-4.24.2-cp37-cp37m-win_amd64.whl", hash = "sha256:c5cdd486af081bf752225b26809d2d0a85e575b80a84cde5172a05bbb1990099"}, - {file = "protobuf-4.24.2-cp38-cp38-win32.whl", hash = "sha256:6bd26c1fa9038b26c5c044ee77e0ecb18463e957fefbaeb81a3feb419313a54e"}, - {file = "protobuf-4.24.2-cp38-cp38-win_amd64.whl", hash = "sha256:bb7aa97c252279da65584af0456f802bd4b2de429eb945bbc9b3d61a42a8cd16"}, - {file = "protobuf-4.24.2-cp39-cp39-win32.whl", hash = "sha256:2b23bd6e06445699b12f525f3e92a916f2dcf45ffba441026357dea7fa46f42b"}, - {file = "protobuf-4.24.2-cp39-cp39-win_amd64.whl", hash = "sha256:839952e759fc40b5d46be319a265cf94920174d88de31657d5622b5d8d6be5cd"}, - {file = "protobuf-4.24.2-py3-none-any.whl", hash = "sha256:3b7b170d3491ceed33f723bbf2d5a260f8a4e23843799a3906f16ef736ef251e"}, - {file = "protobuf-4.24.2.tar.gz", hash = "sha256:7fda70797ddec31ddfa3576cbdcc3ddbb6b3078b737a1a87ab9136af0570cd6e"}, + {file = "protobuf-4.24.3-cp310-abi3-win32.whl", hash = "sha256:20651f11b6adc70c0f29efbe8f4a94a74caf61b6200472a9aea6e19898f9fcf4"}, + {file = "protobuf-4.24.3-cp310-abi3-win_amd64.whl", hash = "sha256:3d42e9e4796a811478c783ef63dc85b5a104b44aaaca85d4864d5b886e4b05e3"}, + {file = "protobuf-4.24.3-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:6e514e8af0045be2b56e56ae1bb14f43ce7ffa0f68b1c793670ccbe2c4fc7d2b"}, + {file = "protobuf-4.24.3-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:ba53c2f04798a326774f0e53b9c759eaef4f6a568ea7072ec6629851c8435959"}, + {file = "protobuf-4.24.3-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:f6ccbcf027761a2978c1406070c3788f6de4a4b2cc20800cc03d52df716ad675"}, + {file = "protobuf-4.24.3-cp37-cp37m-win32.whl", hash = "sha256:1b182c7181a2891e8f7f3a1b5242e4ec54d1f42582485a896e4de81aa17540c2"}, + {file = "protobuf-4.24.3-cp37-cp37m-win_amd64.whl", hash = "sha256:b0271a701e6782880d65a308ba42bc43874dabd1a0a0f41f72d2dac3b57f8e76"}, + {file = "protobuf-4.24.3-cp38-cp38-win32.whl", hash = "sha256:e29d79c913f17a60cf17c626f1041e5288e9885c8579832580209de8b75f2a52"}, + {file = "protobuf-4.24.3-cp38-cp38-win_amd64.whl", hash = "sha256:067f750169bc644da2e1ef18c785e85071b7c296f14ac53e0900e605da588719"}, + {file = "protobuf-4.24.3-cp39-cp39-win32.whl", hash = "sha256:2da777d34b4f4f7613cdf85c70eb9a90b1fbef9d36ae4a0ccfe014b0b07906f1"}, + {file = "protobuf-4.24.3-cp39-cp39-win_amd64.whl", hash = "sha256:f631bb982c5478e0c1c70eab383af74a84be66945ebf5dd6b06fc90079668d0b"}, + {file = "protobuf-4.24.3-py3-none-any.whl", hash = "sha256:f6f8dc65625dadaad0c8545319c2e2f0424fede988368893ca3844261342c11a"}, + {file = "protobuf-4.24.3.tar.gz", hash = "sha256:12e9ad2ec079b833176d2921be2cb24281fa591f0b119b208b788adc48c2561d"}, ] [[package]] @@ -2698,13 +2685,13 @@ files = [ [[package]] name = "pytz" -version = "2023.3" +version = "2023.3.post1" description = "World timezone definitions, modern and historical" optional = true python-versions = "*" files = [ - {file = "pytz-2023.3-py2.py3-none-any.whl", hash = "sha256:a151b3abb88eda1d4e34a9814df37de2a80e301e68ba0fd856fb9b46bfbbbffb"}, - {file = "pytz-2023.3.tar.gz", hash = "sha256:1d8ce29db189191fb55338ee6d0387d82ab59f3d00eac103412d64e0ebd0c588"}, + {file = "pytz-2023.3.post1-py2.py3-none-any.whl", hash = "sha256:ce42d816b81b68506614c11e8937d3aa9e41007ceb50bfdcb0749b921bf646c7"}, + {file = "pytz-2023.3.post1.tar.gz", hash = "sha256:7b4fddbeb94a1eba4b557da24f19fdf9db575192544270a9101d8509f9f43d7b"}, ] [[package]] @@ -2791,35 +2778,35 @@ files = [ [[package]] name = "ray" -version = "2.6.3" +version = "2.7.0" description = "Ray provides a simple, universal API for building distributed applications." optional = true python-versions = "*" files = [ - {file = "ray-2.6.3-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:8a3cde58dba07da7a62e1f804b3dae5b29de3be052e02e4559bff7e7cb4d4a3b"}, - {file = "ray-2.6.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:81e2ee7252e2fbfb05a24124774a8de563daa261200a08d9cbc6b499f7262af1"}, - {file = "ray-2.6.3-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:485e4cd46a569416a14a72c06fe7901b0e3902f3023100b375c477975824e707"}, - {file = "ray-2.6.3-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:abc6a537454506a5fa87137de058d12aeea38da7077aae6f0ebf6199e5f5b2a1"}, - {file = "ray-2.6.3-cp310-cp310-win_amd64.whl", hash = "sha256:787ec7f43f5b3ed85728cf4878bdfed0a334d9108b6af75ef3fe5c8d44a7f74d"}, - {file = "ray-2.6.3-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:bdeacaafcbb97e5f1c3c3349e7fcc0c40f691cea2bf057027c5491ea1ac929b0"}, - {file = "ray-2.6.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4b4600c93e2e94b6ca75ef4b4cb92d7f98d4be5484273d6fbac4218fb82cf96f"}, - {file = "ray-2.6.3-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:0a5870f9a16cb94080d770f83326d7e2163d88d75be240273cef4b932a071bb2"}, - {file = "ray-2.6.3-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:1a8de31a9a4049134cf7e97b725a4078c958a964d091cb3e812e31eddd013bd7"}, - {file = "ray-2.6.3-cp311-cp311-win_amd64.whl", hash = "sha256:56b920a1814decdd20a754b7c5048770684d6d3d242c83aa99da5d3e8c339f13"}, - {file = "ray-2.6.3-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:b358fd112876c3a249fd8cffbf20b26622817c78b2ade0a725a7036c693f8d70"}, - {file = "ray-2.6.3-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:467b9aa63f09d20e3985457816d703fe27ea388cdcaa88ff5eff222f8074a05c"}, - {file = "ray-2.6.3-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:dff21468d621c8dac95b3df320e6c6121f6618f6827243fd75a057c8815c2498"}, - {file = "ray-2.6.3-cp37-cp37m-win_amd64.whl", hash = "sha256:e0f8eaf4c4592335722dad474685c2ffc98207b997e47a24b297a60db389a4cb"}, - {file = "ray-2.6.3-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:31f1dd05130e712b9b64ccad9e6eaa82c715bb25a0a45ffd48ebf4953f6fe347"}, - {file = "ray-2.6.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:90b780e131f891185f9de2b9c08d1f2d729e5755c7389a1ddaa6f796fae0d787"}, - {file = "ray-2.6.3-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:3e5a4bbc29268a64bd2a8d48ed60f32a5bcce285a2a4f4339174947733449e37"}, - {file = "ray-2.6.3-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:a182a80aebf863b5d4e875bed0a80e83200e84f4f63c4126cef87cc01e43f067"}, - {file = "ray-2.6.3-cp38-cp38-win_amd64.whl", hash = "sha256:015a2aa30aba0719d20cdf8fa32c689b68016678cb20f46bd1df8b227c938b84"}, - {file = "ray-2.6.3-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:3ccf809e5948333c1c8c81694514b5900259e79cbdc8bddd3680695820cafcf2"}, - {file = "ray-2.6.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a4ef2f52319286720be7f3bfe6043e9fd0b8cb7826cb2ffc90c23c1c42427464"}, - {file = "ray-2.6.3-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:5923849ec0854ab3e5ca8873d47ed7e11074e1213a3c40f8864c9500de034313"}, - {file = "ray-2.6.3-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:18d033cc468e5171d9995476c33f99a5b79f091c34265c7e9f3d8b1c9042437e"}, - {file = "ray-2.6.3-cp39-cp39-win_amd64.whl", hash = "sha256:bca66c8e8163f06dc5443623e7b221660529a39574a589ba9257f2188ea8bf6b"}, + {file = "ray-2.7.0-cp310-cp310-macosx_10_15_x86_64.whl", hash = "sha256:bc911655908b61b2e9f59b8df158fcc62cd32080c468b484b539ebf0a4111d04"}, + {file = "ray-2.7.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:0ee8c14e1521559cd5802bfad3f0aba4a77afdfba57dd446162a7449c6e8ff68"}, + {file = "ray-2.7.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:ebde44af7d479ede21d1c2e68b5ccd8264e18df6e4f3c216d9e99c31e819bde6"}, + {file = "ray-2.7.0-cp310-cp310-manylinux2014_x86_64.whl", hash = "sha256:b83621f5d2d4079e6ae624c3bf30046a4fefa0ea7ea5e4a4dfe4b50c580b3768"}, + {file = "ray-2.7.0-cp310-cp310-win_amd64.whl", hash = "sha256:8e1b06abba6e227b8dde1ad861c587fb2608a6970d270e4755cd24a6f37ed565"}, + {file = "ray-2.7.0-cp311-cp311-macosx_10_15_x86_64.whl", hash = "sha256:5442d48719f033831a324f05b332d6e7181970d721e9504be2091cc9d9735394"}, + {file = "ray-2.7.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ca8225878cce7b9e2d0ca9668d9370893a7cee35629d11a3889a1b66a0007218"}, + {file = "ray-2.7.0-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:a3f59dbb0780f9fa11f5bf96bef853b4cb95245456d4400e1c7bf2e514d12ab2"}, + {file = "ray-2.7.0-cp311-cp311-manylinux2014_x86_64.whl", hash = "sha256:8384b3f30bc1446ef810e9e894afa03238c5ac40d3c40c0740d82f347112015d"}, + {file = "ray-2.7.0-cp311-cp311-win_amd64.whl", hash = "sha256:8d4530e7024375505552dabd3f4441fc9ac7a5562365a81ba9afa14185433879"}, + {file = "ray-2.7.0-cp37-cp37m-macosx_10_15_x86_64.whl", hash = "sha256:c491b8051eef82b77d136c48a23d16485c0e54233303ccf68e9fe69a06c517e6"}, + {file = "ray-2.7.0-cp37-cp37m-manylinux2014_aarch64.whl", hash = "sha256:1684c434886cb7b263cdf98ed39d75dec343e949f7b14f3385d83bfe70ee8c80"}, + {file = "ray-2.7.0-cp37-cp37m-manylinux2014_x86_64.whl", hash = "sha256:856a9ae164b9b0aeaad54f3e78986eb19900ed3c74e26f51b02a7d8826c97e59"}, + {file = "ray-2.7.0-cp37-cp37m-win_amd64.whl", hash = "sha256:34925a90b6239de42592bb4524dcbdc59a9c65f1f74ad4d9f97f636bd59c73d7"}, + {file = "ray-2.7.0-cp38-cp38-macosx_10_15_x86_64.whl", hash = "sha256:60db240f37d80a80492e09a8d1e29b79d034431c6fcb651401e9e2d24d850793"}, + {file = "ray-2.7.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:835155fdeb8698eae426f3d9416e6b8165197fe5c1c74e1b02a429fc7f4ddcd2"}, + {file = "ray-2.7.0-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:442b7568946081d38c8addbc528e7b09fc1ee25453b4800c86b7e5ba4bce9dd3"}, + {file = "ray-2.7.0-cp38-cp38-manylinux2014_x86_64.whl", hash = "sha256:3825292b777b423e2cd34bf66e8e1e7701b04c6a5308f9f291ad5929b289dc47"}, + {file = "ray-2.7.0-cp38-cp38-win_amd64.whl", hash = "sha256:ce700322662946ad5c62a39b78e81feebcb855d378c49f5df6477c22f0ac1e5a"}, + {file = "ray-2.7.0-cp39-cp39-macosx_10_15_x86_64.whl", hash = "sha256:327c23aac5dd26ee4abe6cee70320322d63fdf97c6028fbb9555724b46a8f3e3"}, + {file = "ray-2.7.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a09021d45312ab7a44109b251984718b65fbff77df0b55e30e651193cdf42bff"}, + {file = "ray-2.7.0-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:f05fcb609962d14f4d23cc88a9d07cafa7077ce3c5d5ee99cd08a19067b7eecf"}, + {file = "ray-2.7.0-cp39-cp39-manylinux2014_x86_64.whl", hash = "sha256:0e0f7dbeb4444940c72b64fdecd6f331593466914b2dffeed03ce97225acec14"}, + {file = "ray-2.7.0-cp39-cp39-win_amd64.whl", hash = "sha256:29a0866316756ae18e232dd074adbf408dcdabe95d135a9a96b9a8c24393c983"}, ] [package.dependencies] @@ -2827,10 +2814,6 @@ aiosignal = "*" click = ">=7.0" filelock = "*" frozenlist = "*" -grpcio = [ - {version = ">=1.32.0", markers = "python_version < \"3.10\""}, - {version = ">=1.42.0", markers = "python_version >= \"3.10\""}, -] jsonschema = "*" msgpack = ">=1.0.0,<2.0.0" numpy = [ @@ -2843,17 +2826,18 @@ pyyaml = "*" requests = "*" [package.extras] -air = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "fsspec", "gpustat (>=1.0.0)", "numpy (>=1.20)", "opencensus", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "requests", "smart-open", "starlette", "tensorboardX (>=1.9)", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)"] -all = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "dm-tree", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (!=1.56.0)", "gymnasium (==0.26.3)", "lz4", "numpy (>=1.20)", "opencensus", "opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "pyyaml", "ray-cpp (==2.6.3)", "requests", "rich", "scikit-image", "scipy", "smart-open", "starlette", "tensorboardX (>=1.9)", "typer", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)"] +air = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "numpy (>=1.20)", "opencensus", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "requests", "smart-open", "starlette", "tensorboardX (>=1.9)", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"] +all = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "dm-tree", "fastapi", "fsspec", "gpustat (>=1.0.0)", "grpcio (!=1.56.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "gymnasium (==0.28.1)", "lz4", "numpy (>=1.20)", "opencensus", "opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk", "pandas", "pandas (>=1.3)", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pyarrow (>=6.0.1)", "pydantic (<2)", "pyyaml", "ray-cpp (==2.7.0)", "requests", "rich", "scikit-image", "scipy", "smart-open", "starlette", "tensorboardX (>=1.9)", "typer", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"] client = ["grpcio (!=1.56.0)"] -cpp = ["ray-cpp (==2.6.3)"] +cpp = ["ray-cpp (==2.7.0)"] data = ["fsspec", "numpy (>=1.20)", "pandas (>=1.3)", "pyarrow (>=6.0.1)"] -default = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "gpustat (>=1.0.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "virtualenv (>=20.0.24,<20.21.1)"] +default = ["aiohttp (>=3.7)", "aiohttp-cors", "colorful", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "virtualenv (>=20.0.24,<20.21.1)"] observability = ["opentelemetry-api", "opentelemetry-exporter-otlp", "opentelemetry-sdk"] -rllib = ["dm-tree", "gymnasium (==0.26.3)", "lz4", "pandas", "pyarrow (>=6.0.1)", "pyyaml", "requests", "rich", "scikit-image", "scipy", "tensorboardX (>=1.9)", "typer"] -serve = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "starlette", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)"] -train = ["pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"] -tune = ["pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"] +rllib = ["dm-tree", "fsspec", "gymnasium (==0.28.1)", "lz4", "pandas", "pyarrow (>=6.0.1)", "pyyaml", "requests", "rich", "scikit-image", "scipy", "tensorboardX (>=1.9)", "typer"] +serve = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "starlette", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"] +serve-grpc = ["aiohttp (>=3.7)", "aiohttp-cors", "aiorwlock", "colorful", "fastapi", "gpustat (>=1.0.0)", "grpcio (>=1.32.0)", "grpcio (>=1.42.0)", "opencensus", "prometheus-client (>=0.7.1)", "py-spy (>=0.2.0)", "pydantic (<2)", "requests", "smart-open", "starlette", "uvicorn", "virtualenv (>=20.0.24,<20.21.1)", "watchfiles"] +train = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"] +tune = ["fsspec", "pandas", "pyarrow (>=6.0.1)", "requests", "tensorboardX (>=1.9)"] [[package]] name = "referencing" @@ -2950,13 +2934,13 @@ tests = ["coverage (>=6.0.0)", "flake8", "mypy", "pytest (>=7.0.0)", "pytest-asy [[package]] name = "rich" -version = "13.5.2" +version = "13.5.3" description = "Render rich text, tables, progress bars, syntax highlighting, markdown and more to the terminal" optional = false python-versions = ">=3.7.0" files = [ - {file = "rich-13.5.2-py3-none-any.whl", hash = "sha256:146a90b3b6b47cac4a73c12866a499e9817426423f57c5a66949c086191a8808"}, - {file = "rich-13.5.2.tar.gz", hash = "sha256:fb9d6c0a0f643c99eed3875b5377a184132ba9be4d61516a55273d3554d75a39"}, + {file = "rich-13.5.3-py3-none-any.whl", hash = "sha256:9257b468badc3d347e146a4faa268ff229039d4c2d176ab0cffb4c4fbc73d5d9"}, + {file = "rich-13.5.3.tar.gz", hash = "sha256:87b43e0543149efa1253f485cd845bb7ee54df16c9617b8a893650ab84b4acb6"}, ] [package.dependencies] @@ -2969,108 +2953,108 @@ jupyter = ["ipywidgets (>=7.5.1,<9)"] [[package]] name = "rpds-py" -version = "0.10.2" +version = "0.10.3" description = "Python bindings to Rust's persistent data structures (rpds)" optional = true python-versions = ">=3.8" files = [ - {file = "rpds_py-0.10.2-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:9f00d54b18dd837f1431d66b076737deb7c29ce3ebb8412ceaf44d5e1954ac0c"}, - {file = "rpds_py-0.10.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f4d561f4728f825e3b793a53064b606ca0b6fc264f67d09e54af452aafc5b82"}, - {file = "rpds_py-0.10.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:013d6c784150d10236a74b4094a79d96a256b814457e388fc5a4ba9efe24c402"}, - {file = "rpds_py-0.10.2-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bd1142d22fdb183a0fff66d79134bf644401437fed874f81066d314c67ee193c"}, - {file = "rpds_py-0.10.2-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4a0536ed2b9297c75104e1a3da330828ba1b2639fa53b38d396f98bf7e3c68df"}, - {file = "rpds_py-0.10.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:41bd430b7b63aa802c02964e331ac0b177148fef5f807d2c90d05ce71a52b4d4"}, - {file = "rpds_py-0.10.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4e8474f7233fe1949ce4e03bea698a600c2d5d6b51dab6d6e6336dbe69acf23e"}, - {file = "rpds_py-0.10.2-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d9d7efaad48b859053b90dedd69bc92f2095084251e732e4c57ac9726bcb1e64"}, - {file = "rpds_py-0.10.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5612b0b1de8d5114520094bd5fc3d04eb8af6f3e10d48ef05b7c8e77c1fd9545"}, - {file = "rpds_py-0.10.2-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5d5eaf988951f6ecb6854ca3300b87123599c711183c83da7ce39717a7cbdbce"}, - {file = "rpds_py-0.10.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:75c8766734ac0053e1d683567e65e85306c4ec62631b0591caeb287ac8f72e08"}, - {file = "rpds_py-0.10.2-cp310-none-win32.whl", hash = "sha256:8de9b88f0cbac73cfed34220d13c57849e62a7099a714b929142425e926d223a"}, - {file = "rpds_py-0.10.2-cp310-none-win_amd64.whl", hash = "sha256:2275f1a022e2383da5d2d101fe11ccdcbae799148c4b83260a4b9309fa3e1fc2"}, - {file = "rpds_py-0.10.2-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:dd91a7d7a9ce7f4983097c91ce211f3e5569cc21caa16f2692298a07e396f82b"}, - {file = "rpds_py-0.10.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:e82b4a70cc67094f3f3fd77579702f48fcf1de7bdc67d79b8f1e24d089a6162c"}, - {file = "rpds_py-0.10.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e281b71922208e00886e4b7ffbfcf27874486364f177418ab676f102130e7ec9"}, - {file = "rpds_py-0.10.2-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b3eb1a0d2b6d232d1bcdfc3fcc5f7b004ab3fbd9203011a3172f051d4527c0b6"}, - {file = "rpds_py-0.10.2-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:02945ae38fd78efc40900f509890de84cfd5ffe2cd2939eeb3a8800dc68b87cb"}, - {file = "rpds_py-0.10.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ccfb77f6dc8abffa6f1c7e3975ed9070a41ce5fcc11154d2bead8c1baa940f09"}, - {file = "rpds_py-0.10.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:af52078719209bef33e38131486fd784832dd8d1dc9b85f00a44f6e7437dd021"}, - {file = "rpds_py-0.10.2-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:56ba7c1100ed079527f2b995bf5486a2e557e6d5b733c52e8947476338815b69"}, - {file = "rpds_py-0.10.2-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:899b03a3be785a7e1ff84b237da71f0efa2f021512f147dd34ffdf7aa82cb678"}, - {file = "rpds_py-0.10.2-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:22e6de18f00583f06928cc8d0993104ecc62f7c6da6478db2255de89a30e45d1"}, - {file = "rpds_py-0.10.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:edd74b760a6bb950397e7a7bd2f38e6700f6525062650b1d77c6d851b82f02c2"}, - {file = "rpds_py-0.10.2-cp311-none-win32.whl", hash = "sha256:18909093944727e068ebfc92e2e6ed1c4fa44135507c1c0555213ce211c53214"}, - {file = "rpds_py-0.10.2-cp311-none-win_amd64.whl", hash = "sha256:9568764e72d85cf7855ca78b48e07ed1be47bf230e2cea8dabda3c95f660b0ff"}, - {file = "rpds_py-0.10.2-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:0fc625059b83695fbb4fc8b7a8b66fa94ff9c7b78c84fb9986cd53ff88a28d80"}, - {file = "rpds_py-0.10.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:c86231c66e4f422e7c13ea6200bb4048b3016c8bfd11b4fd0dabd04d2c8e3501"}, - {file = "rpds_py-0.10.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:56777c57246e048908b550af9b81b0ec9cf804fd47cb7502ccd93238bd6025c2"}, - {file = "rpds_py-0.10.2-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a4cb372e22e9c879bd9a9cc9b20b7c1fbf30a605ac953da45ecec05d8a6e1c77"}, - {file = "rpds_py-0.10.2-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aa3b3a43dabc4cc57a7800f526cbe03f71c69121e21b863fdf497b59b462b163"}, - {file = "rpds_py-0.10.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:59d222086daa55421d599609b32d0ebe544e57654c4a0a1490c54a7ebaa67561"}, - {file = "rpds_py-0.10.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:529aab727f54a937085184e7436e1d0e19975cf10115eda12d37a683e4ee5342"}, - {file = "rpds_py-0.10.2-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:43e9b1531d6a898bdf086acb75c41265c7ec4331267d7619148d407efc72bd24"}, - {file = "rpds_py-0.10.2-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:c2772bb95062e3f9774140205cd65d8997e39620715486cf5f843cf4ad8f744c"}, - {file = "rpds_py-0.10.2-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:ba1b28e44f611f3f2b436bd8290050a61db4b59a8e24be4465f44897936b3824"}, - {file = "rpds_py-0.10.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5aba767e64b494483ad60c4873bec78d16205a21f8247c99749bd990d9c846c2"}, - {file = "rpds_py-0.10.2-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:e1954f4b239d1a92081647eecfd51cbfd08ea16eb743b8af1cd0113258feea14"}, - {file = "rpds_py-0.10.2-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:de4a2fd524993578fe093044f291b4b24aab134390030b3b9b5f87fd41ab7e75"}, - {file = "rpds_py-0.10.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e69737bd56006a86fd5a78b2b85447580a6138c930a75eb9ef39fe03d90782b1"}, - {file = "rpds_py-0.10.2-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f40abbcc0a7d9a8a80870af839d317e6932533f98682aabd977add6c53beeb23"}, - {file = "rpds_py-0.10.2-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29ec8507664f94cc08457d98cfc41c3cdbddfa8952438e644177a29b04937876"}, - {file = "rpds_py-0.10.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bcde80aefe7054fad6277762fb7e9d35c72ea479a485ae1bb14629c640987b30"}, - {file = "rpds_py-0.10.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a65de5c02884760a14a58304fb6303f9ddfc582e630f385daea871e1bdb18686"}, - {file = "rpds_py-0.10.2-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e92e5817eb6bfed23aa5e45bfe30647b83602bdd6f9e25d63524d4e6258458b0"}, - {file = "rpds_py-0.10.2-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:2c8fc6c841ada60a86d29c9ebe2e8757c47eda6553f3596c560e59ca6e9b6fa1"}, - {file = "rpds_py-0.10.2-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:8557c807388e6617161fe51b1a4747ea8d1133f2d2ad8e79583439abebe58fbd"}, - {file = "rpds_py-0.10.2-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:00e97d43a36811b78fa9ad9d3329bf34f76a31e891a7031a2ac01450c9b168ab"}, - {file = "rpds_py-0.10.2-cp38-none-win32.whl", hash = "sha256:1ed3d5385d14be894e12a9033be989e012214a9811e7194849c94032ad69682a"}, - {file = "rpds_py-0.10.2-cp38-none-win_amd64.whl", hash = "sha256:02b4a2e28eb24dac4ef43dda4f6a6f7766e355179b143f7d0c76a1c5488a307b"}, - {file = "rpds_py-0.10.2-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:2a55631b93e47956fbc97d69ba2054a8c6a4016f9a3064ec4e031f5f1030cb90"}, - {file = "rpds_py-0.10.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2ffbf1b38c88d0466de542e91b08225d51782282512f8e2b11715126c41fda48"}, - {file = "rpds_py-0.10.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:213f9ef5c02ec2f883c1075d25a873149daadbaea50d18d622e9db55ec9849c2"}, - {file = "rpds_py-0.10.2-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b00150a9a3fd0a8efaa90bc2696c105b04039d50763dd1c95a34c88c5966cb57"}, - {file = "rpds_py-0.10.2-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ab0f7aabdbce4a202e013083eeab71afdb85efa405dc4a06fea98cde81204675"}, - {file = "rpds_py-0.10.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2cd0c9fb5d40887500b4ed818770c68ab4fa6e0395d286f9704be6751b1b7d98"}, - {file = "rpds_py-0.10.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b8578fc6c8bdd0201327503720fa581000b4bd3934abbf07e2628d1ad3de157d"}, - {file = "rpds_py-0.10.2-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2d27d08056fcd61ff47a0cd8407eff4d3e816c82cb6b9c6f0ce9a0ad49225f81"}, - {file = "rpds_py-0.10.2-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c8f6526df47953b07c45b95c4d1da6b9a0861c0e5da0271db96bb1d807825412"}, - {file = "rpds_py-0.10.2-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:177c033e467a66a054dd3a9534167234a3d0b2e41445807b13b626e01da25d92"}, - {file = "rpds_py-0.10.2-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:9c74cbee9e532dc34371127f7686d6953e5153a1f22beab7f953d95ee4a0fe09"}, - {file = "rpds_py-0.10.2-cp39-none-win32.whl", hash = "sha256:05a1382905026bdd560f806c8c7c16e0f3e3fb359ba8868203ca6e5799884968"}, - {file = "rpds_py-0.10.2-cp39-none-win_amd64.whl", hash = "sha256:3fd503c27e7b7034128e30847ecdb4bff4ca5e60f29ad022a9f66ae8940d54ac"}, - {file = "rpds_py-0.10.2-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:4a96147791e49e84207dd1530109aa0e9eeaf1c8b7a59f150047fc0fcdf9bb64"}, - {file = "rpds_py-0.10.2-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:203eb1532d51591d32e8dfafd60b5d31347ea7278c8da02b4b550287f6abe28b"}, - {file = "rpds_py-0.10.2-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a2f416cdfe92f5fbb77177f5f3f7830059d1582db05f2c7119bf80069d1ab69b"}, - {file = "rpds_py-0.10.2-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:b2660000e1a113869c86eb5cc07f3343467490f3cd9d0299f81da9ddae7137b7"}, - {file = "rpds_py-0.10.2-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:1adb04e4b4e41bf30aaa77eeb169c1b9ba9e5010e2e6ce8d6c17e1446edc9b68"}, - {file = "rpds_py-0.10.2-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2bca97521ee786087f0c5ef318fef3eef0266a9c3deff88205523cf353af7394"}, - {file = "rpds_py-0.10.2-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4969592e3cdeefa4cbb15a26cec102cbd4a1d6e5b695fac9fa026e19741138c8"}, - {file = "rpds_py-0.10.2-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:df61f818edf7c8626bfa392f825860fb670b5f8336e238eb0ec7e2a5689cdded"}, - {file = "rpds_py-0.10.2-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:b589d93a60e78fe55d5bc76ee8c2bf945dbdbb7cd16044c53e0307604e448de1"}, - {file = "rpds_py-0.10.2-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:73da69e1f612c3e682e34dcb971272d90d6f27b2c99acff444ca455a89978574"}, - {file = "rpds_py-0.10.2-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:89438e8885a186c69fe31f7ef98bb2bf29688c466c3caf9060f404c0be89ae80"}, - {file = "rpds_py-0.10.2-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:c4ecc4e9a5d73a816cae36ee6b5d8b7a0c72013cae1e101406e832887c3dc2d8"}, - {file = "rpds_py-0.10.2-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:907b214da5d2fcff0b6ddb83de1333890ca92abaf4bbf8d9c61dc1b95c87fd6e"}, - {file = "rpds_py-0.10.2-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:bb44644371eaa29a3aba7b69b1862d0d56f073bb7585baa32e4271a71a91ee82"}, - {file = "rpds_py-0.10.2-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:80c3cf46511653f94dfe07c7c79ab105c4164d6e1dfcb35b7214fb9af53eaef4"}, - {file = "rpds_py-0.10.2-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:eaba0613c759ebf95988a84f766ca6b7432d55ce399194f95dde588ad1be0878"}, - {file = "rpds_py-0.10.2-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0527c97dcd8bb983822ee31d3760187083fd3ba18ac4dd22cf5347c89d5628f4"}, - {file = "rpds_py-0.10.2-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9cdfd649011ce2d90cb0dd304c5aba1190fac0c266d19a9e2b96b81cfd150a09"}, - {file = "rpds_py-0.10.2-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:75eea40355a8690459c7291ce6c8ce39c27bd223675c7da6619f510c728feb97"}, - {file = "rpds_py-0.10.2-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:4f1b804cfad04f862d6a84af9d1ad941b06f671878f0f7ecad6c92007d423de6"}, - {file = "rpds_py-0.10.2-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:bf77f9017fcfa1232f98598a637406e6c33982ccba8a5922339575c3e2b90ea5"}, - {file = "rpds_py-0.10.2-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:46c4c550bf59ce05d6bff2c98053822549aaf9fbaf81103edea325e03350bca1"}, - {file = "rpds_py-0.10.2-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:46af4a742b90c7460e94214f923452c2c1d050a9da1d2b8d4c70cbc045e692b7"}, - {file = "rpds_py-0.10.2-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:2a86d246a160d98d820ee7d02dc18c923c228de095be362e57b9fd8970b2c4a1"}, - {file = "rpds_py-0.10.2-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ae141c9017f8f473a6ee07a9425da021816a9f8c0683c2e5442f0ccf56b0fc62"}, - {file = "rpds_py-0.10.2-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e1147bc3d0dd1e549d991110d0a09557ec9f925dbc1ca62871fcdab2ec9d716b"}, - {file = "rpds_py-0.10.2-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fce7a8ee8d0f682c953c0188735d823f0fcb62779bf92cd6ba473a8e730e26ad"}, - {file = "rpds_py-0.10.2-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4c7f9d70f99e1fbcbf57c75328b80e1c0a7f6cad43e75efa90a97221be5efe15"}, - {file = "rpds_py-0.10.2-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b309908b6ff5ffbf6394818cb73b5a2a74073acee2c57fe8719046389aeff0d"}, - {file = "rpds_py-0.10.2-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:3ff1f585a0fdc1415bd733b804f33d386064a308672249b14828130dd43e7c31"}, - {file = "rpds_py-0.10.2-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:0188b580c490bccb031e9b67e9e8c695a3c44ac5e06218b152361eca847317c3"}, - {file = "rpds_py-0.10.2-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:abe081453166e206e3a8c6d8ace57214c17b6d9477d7601ac14a365344dbc1f4"}, - {file = "rpds_py-0.10.2-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:9118de88c16947eaf5b92f749e65b0501ea69e7c2be7bd6aefc12551622360e1"}, - {file = "rpds_py-0.10.2.tar.gz", hash = "sha256:289073f68452b96e70990085324be7223944c7409973d13ddfe0eea1c1b5663b"}, + {file = "rpds_py-0.10.3-cp310-cp310-macosx_10_7_x86_64.whl", hash = "sha256:485747ee62da83366a44fbba963c5fe017860ad408ccd6cd99aa66ea80d32b2e"}, + {file = "rpds_py-0.10.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c55f9821f88e8bee4b7a72c82cfb5ecd22b6aad04033334f33c329b29bfa4da0"}, + {file = "rpds_py-0.10.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d3b52a67ac66a3a64a7e710ba629f62d1e26ca0504c29ee8cbd99b97df7079a8"}, + {file = "rpds_py-0.10.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:3aed39db2f0ace76faa94f465d4234aac72e2f32b009f15da6492a561b3bbebd"}, + {file = "rpds_py-0.10.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:271c360fdc464fe6a75f13ea0c08ddf71a321f4c55fc20a3fe62ea3ef09df7d9"}, + {file = "rpds_py-0.10.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ef5fddfb264e89c435be4adb3953cef5d2936fdeb4463b4161a6ba2f22e7b740"}, + {file = "rpds_py-0.10.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a771417c9c06c56c9d53d11a5b084d1de75de82978e23c544270ab25e7c066ff"}, + {file = "rpds_py-0.10.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:52b5cbc0469328e58180021138207e6ec91d7ca2e037d3549cc9e34e2187330a"}, + {file = "rpds_py-0.10.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6ac3fefb0d168c7c6cab24fdfc80ec62cd2b4dfd9e65b84bdceb1cb01d385c33"}, + {file = "rpds_py-0.10.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:8d54bbdf5d56e2c8cf81a1857250f3ea132de77af543d0ba5dce667183b61fec"}, + {file = "rpds_py-0.10.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cd2163f42868865597d89399a01aa33b7594ce8e2c4a28503127c81a2f17784e"}, + {file = "rpds_py-0.10.3-cp310-none-win32.whl", hash = "sha256:ea93163472db26ac6043e8f7f93a05d9b59e0505c760da2a3cd22c7dd7111391"}, + {file = "rpds_py-0.10.3-cp310-none-win_amd64.whl", hash = "sha256:7cd020b1fb41e3ab7716d4d2c3972d4588fdfbab9bfbbb64acc7078eccef8860"}, + {file = "rpds_py-0.10.3-cp311-cp311-macosx_10_7_x86_64.whl", hash = "sha256:1d9b5ee46dcb498fa3e46d4dfabcb531e1f2e76b477e0d99ef114f17bbd38453"}, + {file = "rpds_py-0.10.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:563646d74a4b4456d0cf3b714ca522e725243c603e8254ad85c3b59b7c0c4bf0"}, + {file = "rpds_py-0.10.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e626b864725680cd3904414d72e7b0bd81c0e5b2b53a5b30b4273034253bb41f"}, + {file = "rpds_py-0.10.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:485301ee56ce87a51ccb182a4b180d852c5cb2b3cb3a82f7d4714b4141119d8c"}, + {file = "rpds_py-0.10.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:42f712b4668831c0cd85e0a5b5a308700fe068e37dcd24c0062904c4e372b093"}, + {file = "rpds_py-0.10.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6c9141af27a4e5819d74d67d227d5047a20fa3c7d4d9df43037a955b4c748ec5"}, + {file = "rpds_py-0.10.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ef750a20de1b65657a1425f77c525b0183eac63fe7b8f5ac0dd16f3668d3e64f"}, + {file = "rpds_py-0.10.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:e1a0ffc39f51aa5f5c22114a8f1906b3c17eba68c5babb86c5f77d8b1bba14d1"}, + {file = "rpds_py-0.10.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:f4c179a7aeae10ddf44c6bac87938134c1379c49c884529f090f9bf05566c836"}, + {file = "rpds_py-0.10.3-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:176287bb998fd1e9846a9b666e240e58f8d3373e3bf87e7642f15af5405187b8"}, + {file = "rpds_py-0.10.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:6446002739ca29249f0beaaf067fcbc2b5aab4bc7ee8fb941bd194947ce19aff"}, + {file = "rpds_py-0.10.3-cp311-none-win32.whl", hash = "sha256:c7aed97f2e676561416c927b063802c8a6285e9b55e1b83213dfd99a8f4f9e48"}, + {file = "rpds_py-0.10.3-cp311-none-win_amd64.whl", hash = "sha256:8bd01ff4032abaed03f2db702fa9a61078bee37add0bd884a6190b05e63b028c"}, + {file = "rpds_py-0.10.3-cp312-cp312-macosx_10_7_x86_64.whl", hash = "sha256:4cf0855a842c5b5c391dd32ca273b09e86abf8367572073bd1edfc52bc44446b"}, + {file = "rpds_py-0.10.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:69b857a7d8bd4f5d6e0db4086da8c46309a26e8cefdfc778c0c5cc17d4b11e08"}, + {file = "rpds_py-0.10.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:975382d9aa90dc59253d6a83a5ca72e07f4ada3ae3d6c0575ced513db322b8ec"}, + {file = "rpds_py-0.10.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:35fbd23c1c8732cde7a94abe7fb071ec173c2f58c0bd0d7e5b669fdfc80a2c7b"}, + {file = "rpds_py-0.10.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:106af1653007cc569d5fbb5f08c6648a49fe4de74c2df814e234e282ebc06957"}, + {file = "rpds_py-0.10.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:ce5e7504db95b76fc89055c7f41e367eaadef5b1d059e27e1d6eabf2b55ca314"}, + {file = "rpds_py-0.10.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5aca759ada6b1967fcfd4336dcf460d02a8a23e6abe06e90ea7881e5c22c4de6"}, + {file = "rpds_py-0.10.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b5d4bdd697195f3876d134101c40c7d06d46c6ab25159ed5cbd44105c715278a"}, + {file = "rpds_py-0.10.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:a657250807b6efd19b28f5922520ae002a54cb43c2401e6f3d0230c352564d25"}, + {file = "rpds_py-0.10.3-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:177c9dd834cdf4dc39c27436ade6fdf9fe81484758885f2d616d5d03c0a83bd2"}, + {file = "rpds_py-0.10.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:e22491d25f97199fc3581ad8dd8ce198d8c8fdb8dae80dea3512e1ce6d5fa99f"}, + {file = "rpds_py-0.10.3-cp38-cp38-macosx_10_7_x86_64.whl", hash = "sha256:2f3e1867dd574014253b4b8f01ba443b9c914e61d45f3674e452a915d6e929a3"}, + {file = "rpds_py-0.10.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:c22211c165166de6683de8136229721f3d5c8606cc2c3d1562da9a3a5058049c"}, + {file = "rpds_py-0.10.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:40bc802a696887b14c002edd43c18082cb7b6f9ee8b838239b03b56574d97f71"}, + {file = "rpds_py-0.10.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5e271dd97c7bb8eefda5cca38cd0b0373a1fea50f71e8071376b46968582af9b"}, + {file = "rpds_py-0.10.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:95cde244e7195b2c07ec9b73fa4c5026d4a27233451485caa1cd0c1b55f26dbd"}, + {file = "rpds_py-0.10.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:08a80cf4884920863623a9ee9a285ee04cef57ebedc1cc87b3e3e0f24c8acfe5"}, + {file = "rpds_py-0.10.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:763ad59e105fca09705d9f9b29ecffb95ecdc3b0363be3bb56081b2c6de7977a"}, + {file = "rpds_py-0.10.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:187700668c018a7e76e89424b7c1042f317c8df9161f00c0c903c82b0a8cac5c"}, + {file = "rpds_py-0.10.3-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:5267cfda873ad62591b9332fd9472d2409f7cf02a34a9c9cb367e2c0255994bf"}, + {file = "rpds_py-0.10.3-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:2ed83d53a8c5902ec48b90b2ac045e28e1698c0bea9441af9409fc844dc79496"}, + {file = "rpds_py-0.10.3-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:255f1a10ae39b52122cce26ce0781f7a616f502feecce9e616976f6a87992d6b"}, + {file = "rpds_py-0.10.3-cp38-none-win32.whl", hash = "sha256:a019a344312d0b1f429c00d49c3be62fa273d4a1094e1b224f403716b6d03be1"}, + {file = "rpds_py-0.10.3-cp38-none-win_amd64.whl", hash = "sha256:efb9ece97e696bb56e31166a9dd7919f8f0c6b31967b454718c6509f29ef6fee"}, + {file = "rpds_py-0.10.3-cp39-cp39-macosx_10_7_x86_64.whl", hash = "sha256:570cc326e78ff23dec7f41487aa9c3dffd02e5ee9ab43a8f6ccc3df8f9327623"}, + {file = "rpds_py-0.10.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:cff7351c251c7546407827b6a37bcef6416304fc54d12d44dbfecbb717064717"}, + {file = "rpds_py-0.10.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:177914f81f66c86c012311f8c7f46887ec375cfcfd2a2f28233a3053ac93a569"}, + {file = "rpds_py-0.10.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:448a66b8266de0b581246ca7cd6a73b8d98d15100fb7165974535fa3b577340e"}, + {file = "rpds_py-0.10.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3bbac1953c17252f9cc675bb19372444aadf0179b5df575ac4b56faaec9f6294"}, + {file = "rpds_py-0.10.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9dd9d9d9e898b9d30683bdd2b6c1849449158647d1049a125879cb397ee9cd12"}, + {file = "rpds_py-0.10.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8c71ea77536149e36c4c784f6d420ffd20bea041e3ba21ed021cb40ce58e2c9"}, + {file = "rpds_py-0.10.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16a472300bc6c83fe4c2072cc22b3972f90d718d56f241adabc7ae509f53f154"}, + {file = "rpds_py-0.10.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:b9255e7165083de7c1d605e818025e8860636348f34a79d84ec533546064f07e"}, + {file = "rpds_py-0.10.3-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:53d7a3cd46cdc1689296348cb05ffd4f4280035770aee0c8ead3bbd4d6529acc"}, + {file = "rpds_py-0.10.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:22da15b902f9f8e267020d1c8bcfc4831ca646fecb60254f7bc71763569f56b1"}, + {file = "rpds_py-0.10.3-cp39-none-win32.whl", hash = "sha256:850c272e0e0d1a5c5d73b1b7871b0a7c2446b304cec55ccdb3eaac0d792bb065"}, + {file = "rpds_py-0.10.3-cp39-none-win_amd64.whl", hash = "sha256:de61e424062173b4f70eec07e12469edde7e17fa180019a2a0d75c13a5c5dc57"}, + {file = "rpds_py-0.10.3-pp310-pypy310_pp73-macosx_10_7_x86_64.whl", hash = "sha256:af247fd4f12cca4129c1b82090244ea5a9d5bb089e9a82feb5a2f7c6a9fe181d"}, + {file = "rpds_py-0.10.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:3ad59efe24a4d54c2742929001f2d02803aafc15d6d781c21379e3f7f66ec842"}, + {file = "rpds_py-0.10.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:642ed0a209ced4be3a46f8cb094f2d76f1f479e2a1ceca6de6346a096cd3409d"}, + {file = "rpds_py-0.10.3-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:37d0c59548ae56fae01c14998918d04ee0d5d3277363c10208eef8c4e2b68ed6"}, + {file = "rpds_py-0.10.3-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:aad6ed9e70ddfb34d849b761fb243be58c735be6a9265b9060d6ddb77751e3e8"}, + {file = "rpds_py-0.10.3-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:8f94fdd756ba1f79f988855d948ae0bad9ddf44df296770d9a58c774cfbcca72"}, + {file = "rpds_py-0.10.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:77076bdc8776a2b029e1e6ffbe6d7056e35f56f5e80d9dc0bad26ad4a024a762"}, + {file = "rpds_py-0.10.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:87d9b206b1bd7a0523375dc2020a6ce88bca5330682ae2fe25e86fd5d45cea9c"}, + {file = "rpds_py-0.10.3-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:8efaeb08ede95066da3a3e3c420fcc0a21693fcd0c4396d0585b019613d28515"}, + {file = "rpds_py-0.10.3-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:a4d9bfda3f84fc563868fe25ca160c8ff0e69bc4443c5647f960d59400ce6557"}, + {file = "rpds_py-0.10.3-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:d27aa6bbc1f33be920bb7adbb95581452cdf23005d5611b29a12bb6a3468cc95"}, + {file = "rpds_py-0.10.3-pp38-pypy38_pp73-macosx_10_7_x86_64.whl", hash = "sha256:ed8313809571a5463fd7db43aaca68ecb43ca7a58f5b23b6e6c6c5d02bdc7882"}, + {file = "rpds_py-0.10.3-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:e10e6a1ed2b8661201e79dff5531f8ad4cdd83548a0f81c95cf79b3184b20c33"}, + {file = "rpds_py-0.10.3-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:015de2ce2af1586ff5dc873e804434185199a15f7d96920ce67e50604592cae9"}, + {file = "rpds_py-0.10.3-pp38-pypy38_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ae87137951bb3dc08c7d8bfb8988d8c119f3230731b08a71146e84aaa919a7a9"}, + {file = "rpds_py-0.10.3-pp38-pypy38_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0bb4f48bd0dd18eebe826395e6a48b7331291078a879295bae4e5d053be50d4c"}, + {file = "rpds_py-0.10.3-pp38-pypy38_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:09362f86ec201288d5687d1dc476b07bf39c08478cde837cb710b302864e7ec9"}, + {file = "rpds_py-0.10.3-pp38-pypy38_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:821392559d37759caa67d622d0d2994c7a3f2fb29274948ac799d496d92bca73"}, + {file = "rpds_py-0.10.3-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:7170cbde4070dc3c77dec82abf86f3b210633d4f89550fa0ad2d4b549a05572a"}, + {file = "rpds_py-0.10.3-pp38-pypy38_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:5de11c041486681ce854c814844f4ce3282b6ea1656faae19208ebe09d31c5b8"}, + {file = "rpds_py-0.10.3-pp38-pypy38_pp73-musllinux_1_2_i686.whl", hash = "sha256:4ed172d0c79f156c1b954e99c03bc2e3033c17efce8dd1a7c781bc4d5793dfac"}, + {file = "rpds_py-0.10.3-pp38-pypy38_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:11fdd1192240dda8d6c5d18a06146e9045cb7e3ba7c06de6973000ff035df7c6"}, + {file = "rpds_py-0.10.3-pp39-pypy39_pp73-macosx_10_7_x86_64.whl", hash = "sha256:f602881d80ee4228a2355c68da6b296a296cd22bbb91e5418d54577bbf17fa7c"}, + {file = "rpds_py-0.10.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:691d50c99a937709ac4c4cd570d959a006bd6a6d970a484c84cc99543d4a5bbb"}, + {file = "rpds_py-0.10.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:24cd91a03543a0f8d09cb18d1cb27df80a84b5553d2bd94cba5979ef6af5c6e7"}, + {file = "rpds_py-0.10.3-pp39-pypy39_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:fc2200e79d75b5238c8d69f6a30f8284290c777039d331e7340b6c17cad24a5a"}, + {file = "rpds_py-0.10.3-pp39-pypy39_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:ea65b59882d5fa8c74a23f8960db579e5e341534934f43f3b18ec1839b893e41"}, + {file = "rpds_py-0.10.3-pp39-pypy39_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:829e91f3a8574888b73e7a3feb3b1af698e717513597e23136ff4eba0bc8387a"}, + {file = "rpds_py-0.10.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eab75a8569a095f2ad470b342f2751d9902f7944704f0571c8af46bede438475"}, + {file = "rpds_py-0.10.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:061c3ff1f51ecec256e916cf71cc01f9975af8fb3af9b94d3c0cc8702cfea637"}, + {file = "rpds_py-0.10.3-pp39-pypy39_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:39d05e65f23a0fe897b6ac395f2a8d48c56ac0f583f5d663e0afec1da89b95da"}, + {file = "rpds_py-0.10.3-pp39-pypy39_pp73-musllinux_1_2_i686.whl", hash = "sha256:4eca20917a06d2fca7628ef3c8b94a8c358f6b43f1a621c9815243462dcccf97"}, + {file = "rpds_py-0.10.3-pp39-pypy39_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:e8d0f0eca087630d58b8c662085529781fd5dc80f0a54eda42d5c9029f812599"}, + {file = "rpds_py-0.10.3.tar.gz", hash = "sha256:fcc1ebb7561a3e24a6588f7c6ded15d80aec22c66a070c757559b57b17ffd1cb"}, ] [[package]] @@ -3089,19 +3073,19 @@ pyasn1 = ">=0.1.3" [[package]] name = "s3fs" -version = "2023.9.0" +version = "2023.9.1" description = "Convenient Filesystem interface over S3" optional = true python-versions = ">= 3.8" files = [ - {file = "s3fs-2023.9.0-py3-none-any.whl", hash = "sha256:98ad2b221514490f0fe49b730ccf4f0362031ee8ede6d5392cdd3977ca313b1a"}, - {file = "s3fs-2023.9.0.tar.gz", hash = "sha256:35057d4d59722cab9fe91c9a30147e3e5bddfc55ec14fde8776c512179c823dd"}, + {file = "s3fs-2023.9.1-py3-none-any.whl", hash = "sha256:3bd1f9f33e4ad090d150301c3b386061cb7085fc8bda3a9ec9198dccca765d6c"}, + {file = "s3fs-2023.9.1.tar.gz", hash = "sha256:42e1821ed94c1607c848853d1d715ebcd25c13380b6f510c2cb498c7e5b3e674"}, ] [package.dependencies] aiobotocore = ">=2.5.4,<2.6.0" aiohttp = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1" -fsspec = "2023.9.0" +fsspec = "2023.9.1" [package.extras] awscli = ["aiobotocore[awscli] (>=2.5.4,<2.6.0)"] @@ -3126,19 +3110,19 @@ crt = ["botocore[crt] (>=1.20.29,<2.0a.0)"] [[package]] name = "setuptools" -version = "68.1.2" +version = "68.2.2" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-68.1.2-py3-none-any.whl", hash = "sha256:3d8083eed2d13afc9426f227b24fd1659489ec107c0e86cec2ffdde5c92e790b"}, - {file = "setuptools-68.1.2.tar.gz", hash = "sha256:3d4dfa6d95f1b101d695a6160a7626e15583af71a5f52176efa5d39a054d475d"}, + {file = "setuptools-68.2.2-py3-none-any.whl", hash = "sha256:b454a35605876da60632df1a60f736524eb73cc47bbc9f3f1ef1b644de74fd2a"}, + {file = "setuptools-68.2.2.tar.gz", hash = "sha256:4ac1475276d2f1c48684874089fefcd83bd7162ddaafb81fac866ba0db282a87"}, ] [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5,<=7.1.2)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (==0.8.3)", "sphinx-reredirects", "sphinxcontrib-towncrier"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-hoverxref (<2)", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier"] testing = ["build[virtualenv]", "filelock (>=3.4.0)", "flake8-2020", "ini2toml[lite] (>=0.9)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pip (>=19.1)", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-perf", "pytest-ruff", "pytest-timeout", "pytest-xdist", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel"] -testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] +testing-integration = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "packaging (>=23.1)", "pytest", "pytest-enabler", "pytest-xdist", "tomli", "virtualenv (>=13.0.0)", "wheel"] [[package]] name = "six" @@ -3164,52 +3148,52 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.20" +version = "2.0.21" description = "Database Abstraction Library" optional = true python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.20-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:759b51346aa388c2e606ee206c0bc6f15a5299f6174d1e10cadbe4530d3c7a98"}, - {file = "SQLAlchemy-2.0.20-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1506e988ebeaaf316f183da601f24eedd7452e163010ea63dbe52dc91c7fc70e"}, - {file = "SQLAlchemy-2.0.20-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5768c268df78bacbde166b48be788b83dddaa2a5974b8810af422ddfe68a9bc8"}, - {file = "SQLAlchemy-2.0.20-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a3f0dd6d15b6dc8b28a838a5c48ced7455c3e1fb47b89da9c79cc2090b072a50"}, - {file = "SQLAlchemy-2.0.20-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:243d0fb261f80a26774829bc2cee71df3222587ac789b7eaf6555c5b15651eed"}, - {file = "SQLAlchemy-2.0.20-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:6eb6d77c31e1bf4268b4d61b549c341cbff9842f8e115ba6904249c20cb78a61"}, - {file = "SQLAlchemy-2.0.20-cp310-cp310-win32.whl", hash = "sha256:bcb04441f370cbe6e37c2b8d79e4af9e4789f626c595899d94abebe8b38f9a4d"}, - {file = "SQLAlchemy-2.0.20-cp310-cp310-win_amd64.whl", hash = "sha256:d32b5ffef6c5bcb452723a496bad2d4c52b346240c59b3e6dba279f6dcc06c14"}, - {file = "SQLAlchemy-2.0.20-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:dd81466bdbc82b060c3c110b2937ab65ace41dfa7b18681fdfad2f37f27acdd7"}, - {file = "SQLAlchemy-2.0.20-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6fe7d61dc71119e21ddb0094ee994418c12f68c61b3d263ebaae50ea8399c4d4"}, - {file = "SQLAlchemy-2.0.20-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e4e571af672e1bb710b3cc1a9794b55bce1eae5aed41a608c0401885e3491179"}, - {file = "SQLAlchemy-2.0.20-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3364b7066b3c7f4437dd345d47271f1251e0cfb0aba67e785343cdbdb0fff08c"}, - {file = "SQLAlchemy-2.0.20-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:1be86ccea0c965a1e8cd6ccf6884b924c319fcc85765f16c69f1ae7148eba64b"}, - {file = "SQLAlchemy-2.0.20-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1d35d49a972649b5080557c603110620a86aa11db350d7a7cb0f0a3f611948a0"}, - {file = "SQLAlchemy-2.0.20-cp311-cp311-win32.whl", hash = "sha256:27d554ef5d12501898d88d255c54eef8414576f34672e02fe96d75908993cf53"}, - {file = "SQLAlchemy-2.0.20-cp311-cp311-win_amd64.whl", hash = "sha256:411e7f140200c02c4b953b3dbd08351c9f9818d2bd591b56d0fa0716bd014f1e"}, - {file = "SQLAlchemy-2.0.20-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:3c6aceebbc47db04f2d779db03afeaa2c73ea3f8dcd3987eb9efdb987ffa09a3"}, - {file = "SQLAlchemy-2.0.20-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7d3f175410a6db0ad96b10bfbb0a5530ecd4fcf1e2b5d83d968dd64791f810ed"}, - {file = "SQLAlchemy-2.0.20-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ea8186be85da6587456c9ddc7bf480ebad1a0e6dcbad3967c4821233a4d4df57"}, - {file = "SQLAlchemy-2.0.20-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:c3d99ba99007dab8233f635c32b5cd24fb1df8d64e17bc7df136cedbea427897"}, - {file = "SQLAlchemy-2.0.20-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:76fdfc0f6f5341987474ff48e7a66c3cd2b8a71ddda01fa82fedb180b961630a"}, - {file = "SQLAlchemy-2.0.20-cp37-cp37m-win32.whl", hash = "sha256:d3793dcf5bc4d74ae1e9db15121250c2da476e1af8e45a1d9a52b1513a393459"}, - {file = "SQLAlchemy-2.0.20-cp37-cp37m-win_amd64.whl", hash = "sha256:79fde625a0a55220d3624e64101ed68a059c1c1f126c74f08a42097a72ff66a9"}, - {file = "SQLAlchemy-2.0.20-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:599ccd23a7146e126be1c7632d1d47847fa9f333104d03325c4e15440fc7d927"}, - {file = "SQLAlchemy-2.0.20-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:1a58052b5a93425f656675673ef1f7e005a3b72e3f2c91b8acca1b27ccadf5f4"}, - {file = "SQLAlchemy-2.0.20-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79543f945be7a5ada9943d555cf9b1531cfea49241809dd1183701f94a748624"}, - {file = "SQLAlchemy-2.0.20-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63e73da7fb030ae0a46a9ffbeef7e892f5def4baf8064786d040d45c1d6d1dc5"}, - {file = "SQLAlchemy-2.0.20-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:3ce5e81b800a8afc870bb8e0a275d81957e16f8c4b62415a7b386f29a0cb9763"}, - {file = "SQLAlchemy-2.0.20-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:cb0d3e94c2a84215532d9bcf10229476ffd3b08f481c53754113b794afb62d14"}, - {file = "SQLAlchemy-2.0.20-cp38-cp38-win32.whl", hash = "sha256:8dd77fd6648b677d7742d2c3cc105a66e2681cc5e5fb247b88c7a7b78351cf74"}, - {file = "SQLAlchemy-2.0.20-cp38-cp38-win_amd64.whl", hash = "sha256:6f8a934f9dfdf762c844e5164046a9cea25fabbc9ec865c023fe7f300f11ca4a"}, - {file = "SQLAlchemy-2.0.20-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:26a3399eaf65e9ab2690c07bd5cf898b639e76903e0abad096cd609233ce5208"}, - {file = "SQLAlchemy-2.0.20-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:4cde2e1096cbb3e62002efdb7050113aa5f01718035ba9f29f9d89c3758e7e4e"}, - {file = "SQLAlchemy-2.0.20-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d1b09ba72e4e6d341bb5bdd3564f1cea6095d4c3632e45dc69375a1dbe4e26ec"}, - {file = "SQLAlchemy-2.0.20-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1b74eeafaa11372627ce94e4dc88a6751b2b4d263015b3523e2b1e57291102f0"}, - {file = "SQLAlchemy-2.0.20-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:77d37c1b4e64c926fa3de23e8244b964aab92963d0f74d98cbc0783a9e04f501"}, - {file = "SQLAlchemy-2.0.20-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:eefebcc5c555803065128401a1e224a64607259b5eb907021bf9b175f315d2a6"}, - {file = "SQLAlchemy-2.0.20-cp39-cp39-win32.whl", hash = "sha256:3423dc2a3b94125094897118b52bdf4d37daf142cbcf26d48af284b763ab90e9"}, - {file = "SQLAlchemy-2.0.20-cp39-cp39-win_amd64.whl", hash = "sha256:5ed61e3463021763b853628aef8bc5d469fe12d95f82c74ef605049d810f3267"}, - {file = "SQLAlchemy-2.0.20-py3-none-any.whl", hash = "sha256:63a368231c53c93e2b67d0c5556a9836fdcd383f7e3026a39602aad775b14acf"}, - {file = "SQLAlchemy-2.0.20.tar.gz", hash = "sha256:ca8a5ff2aa7f3ade6c498aaafce25b1eaeabe4e42b73e25519183e4566a16fc6"}, + {file = "SQLAlchemy-2.0.21-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1e7dc99b23e33c71d720c4ae37ebb095bebebbd31a24b7d99dfc4753d2803ede"}, + {file = "SQLAlchemy-2.0.21-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7f0c4ee579acfe6c994637527c386d1c22eb60bc1c1d36d940d8477e482095d4"}, + {file = "SQLAlchemy-2.0.21-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3f7d57a7e140efe69ce2d7b057c3f9a595f98d0bbdfc23fd055efdfbaa46e3a5"}, + {file = "SQLAlchemy-2.0.21-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7ca38746eac23dd7c20bec9278d2058c7ad662b2f1576e4c3dbfcd7c00cc48fa"}, + {file = "SQLAlchemy-2.0.21-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:3cf229704074bce31f7f47d12883afee3b0a02bb233a0ba45ddbfe542939cca4"}, + {file = "SQLAlchemy-2.0.21-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:fb87f763b5d04a82ae84ccff25554ffd903baafba6698e18ebaf32561f2fe4aa"}, + {file = "SQLAlchemy-2.0.21-cp310-cp310-win32.whl", hash = "sha256:89e274604abb1a7fd5c14867a412c9d49c08ccf6ce3e1e04fffc068b5b6499d4"}, + {file = "SQLAlchemy-2.0.21-cp310-cp310-win_amd64.whl", hash = "sha256:e36339a68126ffb708dc6d1948161cea2a9e85d7d7b0c54f6999853d70d44430"}, + {file = "SQLAlchemy-2.0.21-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:bf8eebccc66829010f06fbd2b80095d7872991bfe8415098b9fe47deaaa58063"}, + {file = "SQLAlchemy-2.0.21-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:b977bfce15afa53d9cf6a632482d7968477625f030d86a109f7bdfe8ce3c064a"}, + {file = "SQLAlchemy-2.0.21-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6ff3dc2f60dbf82c9e599c2915db1526d65415be323464f84de8db3e361ba5b9"}, + {file = "SQLAlchemy-2.0.21-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44ac5c89b6896f4740e7091f4a0ff2e62881da80c239dd9408f84f75a293dae9"}, + {file = "SQLAlchemy-2.0.21-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:87bf91ebf15258c4701d71dcdd9c4ba39521fb6a37379ea68088ce8cd869b446"}, + {file = "SQLAlchemy-2.0.21-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:b69f1f754d92eb1cc6b50938359dead36b96a1dcf11a8670bff65fd9b21a4b09"}, + {file = "SQLAlchemy-2.0.21-cp311-cp311-win32.whl", hash = "sha256:af520a730d523eab77d754f5cf44cc7dd7ad2d54907adeb3233177eeb22f271b"}, + {file = "SQLAlchemy-2.0.21-cp311-cp311-win_amd64.whl", hash = "sha256:141675dae56522126986fa4ca713739d00ed3a6f08f3c2eb92c39c6dfec463ce"}, + {file = "SQLAlchemy-2.0.21-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:7614f1eab4336df7dd6bee05bc974f2b02c38d3d0c78060c5faa4cd1ca2af3b8"}, + {file = "SQLAlchemy-2.0.21-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d59cb9e20d79686aa473e0302e4a82882d7118744d30bb1dfb62d3c47141b3ec"}, + {file = "SQLAlchemy-2.0.21-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a95aa0672e3065d43c8aa80080cdd5cc40fe92dc873749e6c1cf23914c4b83af"}, + {file = "SQLAlchemy-2.0.21-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:8c323813963b2503e54d0944813cd479c10c636e3ee223bcbd7bd478bf53c178"}, + {file = "SQLAlchemy-2.0.21-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:419b1276b55925b5ac9b4c7044e999f1787c69761a3c9756dec6e5c225ceca01"}, + {file = "SQLAlchemy-2.0.21-cp37-cp37m-win32.whl", hash = "sha256:4615623a490e46be85fbaa6335f35cf80e61df0783240afe7d4f544778c315a9"}, + {file = "SQLAlchemy-2.0.21-cp37-cp37m-win_amd64.whl", hash = "sha256:cca720d05389ab1a5877ff05af96551e58ba65e8dc65582d849ac83ddde3e231"}, + {file = "SQLAlchemy-2.0.21-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:b4eae01faee9f2b17f08885e3f047153ae0416648f8e8c8bd9bc677c5ce64be9"}, + {file = "SQLAlchemy-2.0.21-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:3eb7c03fe1cd3255811cd4e74db1ab8dca22074d50cd8937edf4ef62d758cdf4"}, + {file = "SQLAlchemy-2.0.21-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2d494b6a2a2d05fb99f01b84cc9af9f5f93bf3e1e5dbdafe4bed0c2823584c1"}, + {file = "SQLAlchemy-2.0.21-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b19ae41ef26c01a987e49e37c77b9ad060c59f94d3b3efdfdbf4f3daaca7b5fe"}, + {file = "SQLAlchemy-2.0.21-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:fc6b15465fabccc94bf7e38777d665b6a4f95efd1725049d6184b3a39fd54880"}, + {file = "SQLAlchemy-2.0.21-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:014794b60d2021cc8ae0f91d4d0331fe92691ae5467a00841f7130fe877b678e"}, + {file = "SQLAlchemy-2.0.21-cp38-cp38-win32.whl", hash = "sha256:0268256a34806e5d1c8f7ee93277d7ea8cc8ae391f487213139018b6805aeaf6"}, + {file = "SQLAlchemy-2.0.21-cp38-cp38-win_amd64.whl", hash = "sha256:73c079e21d10ff2be54a4699f55865d4b275fd6c8bd5d90c5b1ef78ae0197301"}, + {file = "SQLAlchemy-2.0.21-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:785e2f2c1cb50d0a44e2cdeea5fd36b5bf2d79c481c10f3a88a8be4cfa2c4615"}, + {file = "SQLAlchemy-2.0.21-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:c111cd40910ffcb615b33605fc8f8e22146aeb7933d06569ac90f219818345ef"}, + {file = "SQLAlchemy-2.0.21-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c9cba4e7369de663611ce7460a34be48e999e0bbb1feb9130070f0685e9a6b66"}, + {file = "SQLAlchemy-2.0.21-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:50a69067af86ec7f11a8e50ba85544657b1477aabf64fa447fd3736b5a0a4f67"}, + {file = "SQLAlchemy-2.0.21-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:ccb99c3138c9bde118b51a289d90096a3791658da9aea1754667302ed6564f6e"}, + {file = "SQLAlchemy-2.0.21-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:513fd5b6513d37e985eb5b7ed89da5fd9e72354e3523980ef00d439bc549c9e9"}, + {file = "SQLAlchemy-2.0.21-cp39-cp39-win32.whl", hash = "sha256:f9fefd6298433b6e9188252f3bff53b9ff0443c8fde27298b8a2b19f6617eeb9"}, + {file = "SQLAlchemy-2.0.21-cp39-cp39-win_amd64.whl", hash = "sha256:2e617727fe4091cedb3e4409b39368f424934c7faa78171749f704b49b4bb4ce"}, + {file = "SQLAlchemy-2.0.21-py3-none-any.whl", hash = "sha256:ea7da25ee458d8f404b93eb073116156fd7d8c2a776d8311534851f28277b4ce"}, + {file = "SQLAlchemy-2.0.21.tar.gz", hash = "sha256:05b971ab1ac2994a14c56b35eaaa91f86ba080e9ad481b20d99d77f381bb6258"}, ] [package.dependencies] @@ -3334,13 +3318,13 @@ socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"] [[package]] name = "virtualenv" -version = "20.24.4" +version = "20.24.5" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.7" files = [ - {file = "virtualenv-20.24.4-py3-none-any.whl", hash = "sha256:29c70bb9b88510f6414ac3e55c8b413a1f96239b6b789ca123437d5e892190cb"}, - {file = "virtualenv-20.24.4.tar.gz", hash = "sha256:772b05bfda7ed3b8ecd16021ca9716273ad9f4467c801f27e83ac73430246dca"}, + {file = "virtualenv-20.24.5-py3-none-any.whl", hash = "sha256:b80039f280f4919c77b30f1c23294ae357c4c8701042086e3fc005963e4e537b"}, + {file = "virtualenv-20.24.5.tar.gz", hash = "sha256:e8361967f6da6fbdf1426483bfe9fca8287c242ac0bc30429905721cefbff752"}, ] [package.dependencies] @@ -3553,17 +3537,17 @@ multidict = ">=4.0" [[package]] name = "zipp" -version = "3.16.2" +version = "3.17.0" description = "Backport of pathlib-compatible object wrapper for zip files" optional = false python-versions = ">=3.8" files = [ - {file = "zipp-3.16.2-py3-none-any.whl", hash = "sha256:679e51dd4403591b2d6838a48de3d283f3d188412a9782faadf845f298736ba0"}, - {file = "zipp-3.16.2.tar.gz", hash = "sha256:ebc15946aa78bd63458992fc81ec3b6f7b1e92d51c35e6de1c3804e73b799147"}, + {file = "zipp-3.17.0-py3-none-any.whl", hash = "sha256:0e923e726174922dce09c53c59ad483ff7bbb8e572e00c7f7c46b88556409f31"}, + {file = "zipp-3.17.0.tar.gz", hash = "sha256:84e64a1c28cf7e91ed2078bb8cc8c259cb19b76942096c8d7b84947690cabaf0"}, ] [package.extras] -docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] +docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (<7.2.5)", "sphinx (>=3.5)", "sphinx-lint"] testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] [[package]] @@ -3629,7 +3613,7 @@ adlfs = ["adlfs"] duckdb = ["duckdb", "pyarrow"] dynamodb = ["boto3"] gcsfs = ["gcsfs"] -glue = ["boto3"] +glue = ["boto3", "mypy-boto3-glue"] hive = ["thrift"] pandas = ["pandas", "pyarrow"] pyarrow = ["pyarrow"] @@ -3642,4 +3626,4 @@ zstandard = ["zstandard"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "5c925b1cdd1b705341ef8da13b19674087e849194d28f5cf2bbc91ad9ed9b0e1" +content-hash = "f1b2829663670132eaf275b01f33627eff5ed42eeb6e8dcba73b7c4ad46e90ec" diff --git a/python/pyiceberg/catalog/glue.py b/python/pyiceberg/catalog/glue.py index 405df6720fd9..e0683632deec 100644 --- a/python/pyiceberg/catalog/glue.py +++ b/python/pyiceberg/catalog/glue.py @@ -18,14 +18,22 @@ from typing import ( Any, - Dict, List, Optional, Set, Union, + cast, ) import boto3 +from mypy_boto3_glue.client import GlueClient +from mypy_boto3_glue.type_defs import ( + DatabaseInputTypeDef, + DatabaseTypeDef, + StorageDescriptorTypeDef, + TableInputTypeDef, + TableTypeDef, +) from pyiceberg.catalog import ( EXTERNAL_TABLE, @@ -56,109 +64,97 @@ from pyiceberg.table.sorting import UNSORTED_SORT_ORDER, SortOrder from pyiceberg.typedef import EMPTY_DICT -BOTO_SESSION_CONFIG_KEYS = ["aws_access_key_id", "aws_secret_access_key", "aws_session_token", "region_name", "profile_name"] - -GLUE_CLIENT = "glue" - - -PROP_GLUE_TABLE = "Table" -PROP_GLUE_TABLE_TYPE = "TableType" -PROP_GLUE_TABLE_DESCRIPTION = "Description" -PROP_GLUE_TABLE_PARAMETERS = "Parameters" -PROP_GLUE_TABLE_DATABASE_NAME = "DatabaseName" -PROP_GLUE_TABLE_NAME = "Name" -PROP_GLUE_TABLE_OWNER = "Owner" -PROP_GLUE_TABLE_STORAGE_DESCRIPTOR = "StorageDescriptor" - -PROP_GLUE_TABLELIST = "TableList" - -PROP_GLUE_DATABASE = "Database" -PROP_GLUE_DATABASE_LIST = "DatabaseList" -PROP_GLUE_DATABASE_NAME = "Name" -PROP_GLUE_DATABASE_LOCATION = "LocationUri" -PROP_GLUE_DATABASE_DESCRIPTION = "Description" -PROP_GLUE_DATABASE_PARAMETERS = "Parameters" - -PROP_GLUE_NEXT_TOKEN = "NextToken" - -GLUE_DESCRIPTION_KEY = "comment" - def _construct_parameters(metadata_location: str) -> Properties: return {TABLE_TYPE: ICEBERG.upper(), METADATA_LOCATION: metadata_location} -def _construct_create_table_input(table_name: str, metadata_location: str, properties: Properties) -> Dict[str, Any]: - table_input = { - PROP_GLUE_TABLE_NAME: table_name, - PROP_GLUE_TABLE_TYPE: EXTERNAL_TABLE, - PROP_GLUE_TABLE_PARAMETERS: _construct_parameters(metadata_location), +def _construct_create_table_input(table_name: str, metadata_location: str, properties: Properties) -> TableInputTypeDef: + table_input: TableInputTypeDef = { + "Name": table_name, + "TableType": EXTERNAL_TABLE, + "Parameters": _construct_parameters(metadata_location), } - if table_description := properties.get(GLUE_DESCRIPTION_KEY): - table_input[PROP_GLUE_TABLE_DESCRIPTION] = table_description + if "Description" in properties: + table_input["Description"] = properties["Description"] return table_input -def _construct_rename_table_input(to_table_name: str, glue_table: Dict[str, Any]) -> Dict[str, Any]: - rename_table_input = {PROP_GLUE_TABLE_NAME: to_table_name} +def _construct_rename_table_input(to_table_name: str, glue_table: TableTypeDef) -> TableInputTypeDef: + rename_table_input: TableInputTypeDef = {"Name": to_table_name} # use the same Glue info to create the new table, pointing to the old metadata - if table_type := glue_table.get(PROP_GLUE_TABLE_TYPE): - rename_table_input[PROP_GLUE_TABLE_TYPE] = table_type - if table_parameters := glue_table.get(PROP_GLUE_TABLE_PARAMETERS): - rename_table_input[PROP_GLUE_TABLE_PARAMETERS] = table_parameters - if table_owner := glue_table.get(PROP_GLUE_TABLE_OWNER): - rename_table_input[PROP_GLUE_TABLE_OWNER] = table_owner - if table_storage_descriptor := glue_table.get(PROP_GLUE_TABLE_STORAGE_DESCRIPTOR): - rename_table_input[PROP_GLUE_TABLE_STORAGE_DESCRIPTOR] = table_storage_descriptor - if table_description := glue_table.get(PROP_GLUE_TABLE_DESCRIPTION): - rename_table_input[PROP_GLUE_TABLE_DESCRIPTION] = table_description + assert glue_table["TableType"] + rename_table_input["TableType"] = glue_table["TableType"] + if "Owner" in glue_table: + rename_table_input["Owner"] = glue_table["Owner"] + + if "Parameters" in glue_table: + rename_table_input["Parameters"] = glue_table["Parameters"] + + if "StorageDescriptor" in glue_table: + # It turns out the output of StorageDescriptor is not the same as the input type + # because the Column can have a different type, but for now it seems to work, so + # silence the type error. + rename_table_input["StorageDescriptor"] = cast(StorageDescriptorTypeDef, glue_table["StorageDescriptor"]) + + if "Description" in glue_table: + rename_table_input["Description"] = glue_table["Description"] + return rename_table_input -def _construct_database_input(database_name: str, properties: Properties) -> Dict[str, Any]: - database_input: Dict[str, Any] = {PROP_GLUE_DATABASE_NAME: database_name} +def _construct_database_input(database_name: str, properties: Properties) -> DatabaseInputTypeDef: + database_input: DatabaseInputTypeDef = {"Name": database_name} parameters = {} for k, v in properties.items(): - if k == GLUE_DESCRIPTION_KEY: - database_input[PROP_GLUE_DATABASE_DESCRIPTION] = v + if k == "Description": + database_input["Description"] = v elif k == LOCATION: - database_input[PROP_GLUE_DATABASE_LOCATION] = v + database_input["LocationUri"] = v else: parameters[k] = v - database_input[PROP_GLUE_DATABASE_PARAMETERS] = parameters + database_input["Parameters"] = parameters return database_input class GlueCatalog(Catalog): - def __init__(self, name: str, **properties: str): + def __init__(self, name: str, **properties: Any): super().__init__(name, **properties) - session_config = {k: v for k, v in properties.items() if k in BOTO_SESSION_CONFIG_KEYS} - session = boto3.Session(**session_config) - self.glue = session.client(GLUE_CLIENT) + session = boto3.Session( + profile_name=properties.get("profile_name"), + region_name=properties.get("region_name"), + botocore_session=properties.get("botocore_session"), + aws_access_key_id=properties.get("aws_access_key_id"), + aws_secret_access_key=properties.get("aws_secret_access_key"), + aws_session_token=properties.get("aws_session_token"), + ) + self.glue: GlueClient = session.client("glue") + + def _convert_glue_to_iceberg(self, glue_table: TableTypeDef) -> Table: + properties: Properties = glue_table["Parameters"] - def _convert_glue_to_iceberg(self, glue_table: Dict[str, Any]) -> Table: - properties: Properties = glue_table.get(PROP_GLUE_TABLE_PARAMETERS, {}) + assert glue_table["DatabaseName"] + assert glue_table["Parameters"] + database_name = glue_table["DatabaseName"] + table_name = glue_table["Name"] if TABLE_TYPE not in properties: raise NoSuchPropertyException( - f"Property {TABLE_TYPE} missing, could not determine type: " - f"{glue_table[PROP_GLUE_TABLE_DATABASE_NAME]}.{glue_table[PROP_GLUE_TABLE_NAME]}" + f"Property {TABLE_TYPE} missing, could not determine type: {database_name}.{table_name}" ) glue_table_type = properties[TABLE_TYPE] if glue_table_type.lower() != ICEBERG: raise NoSuchIcebergTableError( - f"Property table_type is {glue_table_type}, expected {ICEBERG}: " - f"{glue_table[PROP_GLUE_TABLE_DATABASE_NAME]}.{glue_table[PROP_GLUE_TABLE_NAME]}" + f"Property table_type is {glue_table_type}, expected {ICEBERG}: {database_name}.{table_name}" ) if METADATA_LOCATION not in properties: raise NoSuchPropertyException( - f"Table property {METADATA_LOCATION} is missing, cannot find metadata for: " - f"{glue_table[PROP_GLUE_TABLE_DATABASE_NAME]}.{glue_table[PROP_GLUE_TABLE_NAME]}" + f"Table property {METADATA_LOCATION} is missing, cannot find metadata for: {database_name}.{table_name}" ) metadata_location = properties[METADATA_LOCATION] @@ -166,14 +162,14 @@ def _convert_glue_to_iceberg(self, glue_table: Dict[str, Any]) -> Table: file = io.new_input(metadata_location) metadata = FromInputFile.table_metadata(file) return Table( - identifier=(self.name, glue_table[PROP_GLUE_TABLE_DATABASE_NAME], glue_table[PROP_GLUE_TABLE_NAME]), + identifier=(self.name, database_name, table_name), metadata=metadata, metadata_location=metadata_location, io=self._load_file_io(metadata.properties, metadata_location), catalog=self, ) - def _create_glue_table(self, database_name: str, table_name: str, table_input: Dict[str, Any]) -> None: + def _create_glue_table(self, database_name: str, table_name: str, table_input: TableInputTypeDef) -> None: try: self.glue.create_table(DatabaseName=database_name, TableInput=table_input) except self.glue.exceptions.AlreadyExistsException as e: @@ -275,7 +271,7 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table: except self.glue.exceptions.EntityNotFoundException as e: raise NoSuchTableError(f"Table does not exist: {database_name}.{table_name}") from e - return self._convert_glue_to_iceberg(load_table_response.get(PROP_GLUE_TABLE, {})) + return self._convert_glue_to_iceberg(load_table_response["Table"]) def drop_table(self, identifier: Union[str, Identifier]) -> None: """Drop a table. @@ -318,7 +314,7 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U except self.glue.exceptions.EntityNotFoundException as e: raise NoSuchTableError(f"Table does not exist: {from_database_name}.{from_table_name}") from e - glue_table = get_table_response[PROP_GLUE_TABLE] + glue_table = get_table_response["Table"] try: # verify that from_identifier is a valid iceberg table @@ -405,18 +401,24 @@ def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]: NoSuchNamespaceError: If a namespace with the given name does not exist, or the identifier is invalid. """ database_name = self.identifier_to_database(namespace, NoSuchNamespaceError) - table_list = [] + table_list: List[TableTypeDef] = [] + next_token: Optional[str] = None try: table_list_response = self.glue.get_tables(DatabaseName=database_name) - next_token = table_list_response.get(PROP_GLUE_NEXT_TOKEN) - table_list += table_list_response.get(PROP_GLUE_TABLELIST, []) - while next_token: - table_list_response = self.glue.get_tables(DatabaseName=database_name, NextToken=next_token) - next_token = table_list_response.get(PROP_GLUE_NEXT_TOKEN) - table_list += table_list_response.get(PROP_GLUE_TABLELIST, []) + while True: + table_list_response = ( + self.glue.get_tables(DatabaseName=database_name) + if not next_token + else self.glue.get_tables(DatabaseName=database_name, NextToken=next_token) + ) + table_list.extend(table_list_response["TableList"]) + next_token = table_list_response.get("NextToken") + if not next_token: + break + except self.glue.exceptions.EntityNotFoundException as e: raise NoSuchNamespaceError(f"Database does not exist: {database_name}") from e - return [(database_name, table.get(PROP_GLUE_TABLE_NAME)) for table in table_list] + return [(database_name, table["Name"]) for table in table_list] def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identifier]: """List namespaces from the given namespace. If not given, list top-level namespaces from the catalog. @@ -428,15 +430,18 @@ def list_namespaces(self, namespace: Union[str, Identifier] = ()) -> List[Identi if namespace: return [] - database_list = [] + database_list: List[DatabaseTypeDef] = [] databases_response = self.glue.get_databases() - next_token = databases_response.get(PROP_GLUE_NEXT_TOKEN) - database_list += databases_response.get(PROP_GLUE_DATABASE_LIST, []) - while next_token: - databases_response = self.glue.get_databases(NextToken=next_token) - next_token = databases_response.get(PROP_GLUE_NEXT_TOKEN) - database_list += databases_response.get(PROP_GLUE_DATABASE_LIST, []) - return [self.identifier_to_tuple(database.get(PROP_GLUE_DATABASE_NAME)) for database in database_list] + next_token: Optional[str] = None + + while True: + databases_response = self.glue.get_databases() if not next_token else self.glue.get_databases(NextToken=next_token) + database_list.extend(databases_response["DatabaseList"]) + next_token = databases_response.get("NextToken") + if not next_token: + break + + return [self.identifier_to_tuple(database["Name"]) for database in database_list] def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Properties: """Get properties for a namespace. @@ -458,13 +463,13 @@ def load_namespace_properties(self, namespace: Union[str, Identifier]) -> Proper except self.glue.exceptions.InvalidInputException as e: raise NoSuchNamespaceError(f"Invalid input for namespace {database_name}") from e - database = database_response[PROP_GLUE_DATABASE] + database = database_response["Database"] - properties = dict(database.get(PROP_GLUE_DATABASE_PARAMETERS, {})) - if database_location := database.get(PROP_GLUE_DATABASE_LOCATION): - properties[LOCATION] = database_location - if database_description := database.get(PROP_GLUE_DATABASE_DESCRIPTION): - properties[GLUE_DESCRIPTION_KEY] = database_description + properties = dict(database.get("Parameters", {})) + if "LocationUri" in database: + properties["location"] = database["LocationUri"] + if "Description" in database: + properties["Description"] = database["Description"] return properties diff --git a/python/pyiceberg/io/pyarrow.py b/python/pyiceberg/io/pyarrow.py index 2cc20549feb6..7f6045abeda4 100644 --- a/python/pyiceberg/io/pyarrow.py +++ b/python/pyiceberg/io/pyarrow.py @@ -297,24 +297,24 @@ def to_input_file(self) -> PyArrowFile: class PyArrowFileIO(FileIO): - fs_by_scheme: Callable[[str], FileSystem] + fs_by_scheme: Callable[[str, Optional[str]], FileSystem] def __init__(self, properties: Properties = EMPTY_DICT): - self.fs_by_scheme: Callable[[str], FileSystem] = lru_cache(self._initialize_fs) + self.fs_by_scheme: Callable[[str, Optional[str]], FileSystem] = lru_cache(self._initialize_fs) super().__init__(properties=properties) @staticmethod - def parse_location(location: str) -> Tuple[str, str]: + def parse_location(location: str) -> Tuple[str, str, str]: """Return the path without the scheme.""" uri = urlparse(location) if not uri.scheme: - return "file", os.path.abspath(location) + return "file", uri.netloc, os.path.abspath(location) elif uri.scheme == "hdfs": - return uri.scheme, location + return uri.scheme, uri.netloc, location else: - return uri.scheme, f"{uri.netloc}{uri.path}" + return uri.scheme, uri.netloc, f"{uri.netloc}{uri.path}" - def _initialize_fs(self, scheme: str) -> FileSystem: + def _initialize_fs(self, scheme: str, netloc: Optional[str] = None) -> FileSystem: if scheme in {"s3", "s3a", "s3n"}: from pyarrow.fs import S3FileSystem @@ -334,6 +334,8 @@ def _initialize_fs(self, scheme: str) -> FileSystem: from pyarrow.fs import HadoopFileSystem hdfs_kwargs: Dict[str, Any] = {} + if netloc: + return HadoopFileSystem.from_uri(f"hdfs://{netloc}") if host := self.properties.get(HDFS_HOST): hdfs_kwargs["host"] = host if port := self.properties.get(HDFS_PORT): @@ -377,9 +379,9 @@ def new_input(self, location: str) -> PyArrowFile: Returns: PyArrowFile: A PyArrowFile instance for the given location. """ - scheme, path = self.parse_location(location) + scheme, netloc, path = self.parse_location(location) return PyArrowFile( - fs=self.fs_by_scheme(scheme), + fs=self.fs_by_scheme(scheme, netloc), location=location, path=path, buffer_size=int(self.properties.get(BUFFER_SIZE, ONE_MEGABYTE)), @@ -394,9 +396,9 @@ def new_output(self, location: str) -> PyArrowFile: Returns: PyArrowFile: A PyArrowFile instance for the given location. """ - scheme, path = self.parse_location(location) + scheme, netloc, path = self.parse_location(location) return PyArrowFile( - fs=self.fs_by_scheme(scheme), + fs=self.fs_by_scheme(scheme, netloc), location=location, path=path, buffer_size=int(self.properties.get(BUFFER_SIZE, ONE_MEGABYTE)), @@ -415,8 +417,8 @@ def delete(self, location: Union[str, InputFile, OutputFile]) -> None: an AWS error code 15. """ str_location = location.location if isinstance(location, (InputFile, OutputFile)) else location - scheme, path = self.parse_location(str_location) - fs = self.fs_by_scheme(scheme) + scheme, netloc, path = self.parse_location(str_location) + fs = self.fs_by_scheme(scheme, netloc) try: fs.delete_file(path) @@ -588,7 +590,7 @@ def _get_file_format(file_format: FileFormat, **kwargs: Dict[str, Any]) -> ds.Fi def _construct_fragment(fs: FileSystem, data_file: DataFile, file_format_kwargs: Dict[str, Any] = EMPTY_DICT) -> ds.Fragment: - _, path = PyArrowFileIO.parse_location(data_file.file_path) + _, _, path = PyArrowFileIO.parse_location(data_file.file_path) return _get_file_format(data_file.file_format, **file_format_kwargs).make_fragment(path, fs) @@ -810,7 +812,7 @@ def _task_to_table( if limit and sum(row_counts) >= limit: return None - _, path = PyArrowFileIO.parse_location(task.file.file_path) + _, _, path = PyArrowFileIO.parse_location(task.file.file_path) arrow_format = ds.ParquetFileFormat(pre_buffer=True, buffer_size=(ONE_MEGABYTE * 8)) with fs.open_input_file(path) as fin: fragment = arrow_format.make_fragment(fin) @@ -919,9 +921,9 @@ def project_table( Raises: ResolveError: When an incompatible query is done. """ - scheme, _ = PyArrowFileIO.parse_location(table.location()) + scheme, netloc, _ = PyArrowFileIO.parse_location(table.location()) if isinstance(table.io, PyArrowFileIO): - fs = table.io.fs_by_scheme(scheme) + fs = table.io.fs_by_scheme(scheme, netloc) else: try: from pyiceberg.io.fsspec import FsspecFileIO diff --git a/python/pyiceberg/table/__init__.py b/python/pyiceberg/table/__init__.py index b905c955c848..8443315a6484 100644 --- a/python/pyiceberg/table/__init__.py +++ b/python/pyiceberg/table/__init__.py @@ -541,6 +541,16 @@ def __eq__(self, other: Any) -> bool: else False ) + def __repr__(self) -> str: + """Return the string representation of the Table class.""" + table_name = self.catalog.table_name_from(self.identifier) + schema_str = ",\n ".join(str(column) for column in self.schema().columns if self.schema()) + partition_str = f"partition by: [{', '.join(field.name for field in self.spec().fields if self.spec())}]" + sort_order_str = f"sort order: [{', '.join(str(field) for field in self.sort_order().fields if self.sort_order())}]" + snapshot_str = f"snapshot: {str(self.current_snapshot()) if self.current_snapshot() else 'null'}" + result_str = f"{table_name}(\n {schema_str}\n),\n{partition_str},\n{sort_order_str},\n{snapshot_str}" + return result_str + class StaticTable(Table): """Load a table directly from a metadata file (i.e., without using a catalog).""" diff --git a/python/pyproject.toml b/python/pyproject.toml index 4ef77f67221f..ef64f5fb06b8 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -63,6 +63,7 @@ duckdb = { version = ">=0.5.0,<1.0.0", optional = true } ray = { version = ">=2.0.0,<3.0.0", optional = true } python-snappy = { version = ">=0.6.0,<1.0.0", optional = true } thrift = { version = ">=0.13.0,<1.0.0", optional = true } +mypy-boto3-glue = { version = ">=1.28.18", optional = true } boto3 = { version = ">=1.24.59", optional = true } s3fs = { version = ">=2023.1.0,<2024.1.0", optional = true } adlfs = { version = ">=2023.1.0,<2024.1.0", optional = true } @@ -105,7 +106,7 @@ ray = ["ray", "pyarrow", "pandas"] snappy = ["python-snappy"] hive = ["thrift"] s3fs = ["s3fs"] -glue = ["boto3"] +glue = ["boto3", "mypy-boto3-glue"] adlfs = ["adlfs"] dynamodb = ["boto3"] zstandard = ["zstandard"] @@ -236,6 +237,10 @@ ignore_missing_imports = true module = "botocore.*" ignore_missing_imports = true +[[tool.mypy.overrides]] +module = "mypy_boto3_glue.*" +ignore_missing_imports = true + [[tool.mypy.overrides]] module = "moto" ignore_missing_imports = true diff --git a/python/tests/catalog/test_glue.py b/python/tests/catalog/test_glue.py index 2bb90e41a5a8..1d7027a216f2 100644 --- a/python/tests/catalog/test_glue.py +++ b/python/tests/catalog/test_glue.py @@ -14,7 +14,7 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. -from typing import List +from typing import Any, Dict, List from unittest import mock import pytest @@ -441,12 +441,13 @@ def test_update_namespace_properties_overlap_update_removal( @mock_glue def test_passing_profile_name() -> None: - session_properties = { + session_properties: Dict[str, Any] = { "aws_access_key_id": "abc", "aws_secret_access_key": "def", "aws_session_token": "ghi", "region_name": "eu-central-1", "profile_name": "sandbox", + "botocore_session": None, } test_properties = {"type": "glue"} test_properties.update(session_properties) diff --git a/python/tests/io/test_pyarrow.py b/python/tests/io/test_pyarrow.py index 49e1c8bca8c0..8b622125932d 100644 --- a/python/tests/io/test_pyarrow.py +++ b/python/tests/io/test_pyarrow.py @@ -1529,17 +1529,16 @@ def test_writing_avro_file_gcs(generated_manifest_entry_file: str, pyarrow_filei pyarrow_fileio_gcs.delete(f"gs://warehouse/{filename}") -def test_parse_hdfs_location() -> None: - locations = ["hdfs://127.0.0.1:9000/root/foo.txt", "hdfs://127.0.0.1/root/foo.txt"] - for location in locations: - schema, path = PyArrowFileIO.parse_location(location) - assert schema == "hdfs" - assert location == path - - -def test_parse_local_location() -> None: - locations = ["/root/foo.txt", "/root/tmp/foo.txt"] - for location in locations: - schema, path = PyArrowFileIO.parse_location(location) - assert schema == "file" - assert location == path +def test_parse_location() -> None: + def check_results(location: str, expected_schema: str, expected_netloc: str, expected_uri: str) -> None: + schema, netloc, uri = PyArrowFileIO.parse_location(location) + assert schema == expected_schema + assert netloc == expected_netloc + assert uri == expected_uri + + check_results("hdfs://127.0.0.1:9000/root/foo.txt", "hdfs", "127.0.0.1:9000", "hdfs://127.0.0.1:9000/root/foo.txt") + check_results("hdfs://127.0.0.1/root/foo.txt", "hdfs", "127.0.0.1", "hdfs://127.0.0.1/root/foo.txt") + check_results("hdfs://clusterA/root/foo.txt", "hdfs", "clusterA", "hdfs://clusterA/root/foo.txt") + + check_results("/root/foo.txt", "file", "", "/root/foo.txt") + check_results("/root/tmp/foo.txt", "file", "", "/root/tmp/foo.txt") diff --git a/python/tests/table/test_init.py b/python/tests/table/test_init.py index 3ee0cd37f2d9..8fd5e2bcdbeb 100644 --- a/python/tests/table/test_init.py +++ b/python/tests/table/test_init.py @@ -193,6 +193,18 @@ def test_snapshot_by_name_does_not_exist(table: Table) -> None: assert table.snapshot_by_name("doesnotexist") is None +def test_repr(table: Table) -> None: + expected = """table( + 1: x: required long, + 2: y: required long (comment), + 3: z: required long +), +partition by: [x], +sort order: [2 ASC NULLS FIRST, bucket[4](3) DESC NULLS LAST], +snapshot: Operation.APPEND: id=3055729675574597004, parent_id=3051729675574597004, schema_id=1""" + assert repr(table) == expected + + def test_history(table: Table) -> None: assert table.history() == [ SnapshotLogEntry(snapshot_id=3051729675574597004, timestamp_ms=1515100955770), diff --git a/spark/v3.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java b/spark/v3.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java index d8c415b28120..f443b6f8f18f 100644 --- a/spark/v3.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java +++ b/spark/v3.1/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java @@ -34,6 +34,7 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.catalyst.analysis.NoSuchProcedureException; +import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Test; @@ -412,6 +413,26 @@ public void testInvalidCasesForRewriteDataFiles() { () -> sql("CALL %s.system.rewrite_data_files('')", catalogName)); } + @Test + public void testRewriteWithUntranslatedOrUnconvertedFilter() { + createTable(); + Assertions.assertThatThrownBy( + () -> + sql( + "CALL %s.system.rewrite_data_files(table => '%s', where => 'lower(c2) = \"fo\"')", + catalogName, tableIdent)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot translate Spark expression"); + + Assertions.assertThatThrownBy( + () -> + sql( + "CALL %s.system.rewrite_data_files(table => '%s', where => 'c2 like \"%%fo\"')", + catalogName, tableIdent)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot convert Spark filter"); + } + private void createTable() { sql("CREATE TABLE %s (c1 int, c2 string, c3 string) USING iceberg", tableName); } diff --git a/spark/v3.1/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala b/spark/v3.1/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala index 60de1cb82a52..077884b1a66e 100644 --- a/spark/v3.1/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala +++ b/spark/v3.1/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala @@ -31,7 +31,17 @@ object SparkExpressionConverter { // Currently, it is a double conversion as we are converting Spark expression to Spark filter // and then converting Spark filter to Iceberg expression. // But these two conversions already exist and well tested. So, we are going with this approach. - SparkFilters.convert(DataSourceStrategy.translateFilter(sparkExpression, supportNestedPredicatePushdown = true).get) + DataSourceStrategy.translateFilter(sparkExpression, supportNestedPredicatePushdown = true) match { + case Some(filter) => + val converted = SparkFilters.convert(filter) + if (converted == null) { + throw new IllegalArgumentException(s"Cannot convert Spark filter: $filter to Iceberg expression") + } + + converted + case _ => + throw new IllegalArgumentException(s"Cannot translate Spark expression: $sparkExpression to data source filter") + } } @throws[AnalysisException] diff --git a/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java b/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java index d13e0967b638..ba838a4a983f 100644 --- a/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java +++ b/spark/v3.2/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java @@ -41,6 +41,7 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.catalyst.analysis.NoSuchProcedureException; +import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Assert; import org.junit.Assume; @@ -656,6 +657,26 @@ public void testDefaultSortOrder() { assertEquals("Data after compaction should not change", expectedRecords, actualRecords); } + @Test + public void testRewriteWithUntranslatedOrUnconvertedFilter() { + createTable(); + Assertions.assertThatThrownBy( + () -> + sql( + "CALL %s.system.rewrite_data_files(table => '%s', where => 'lower(c2) = \"fo\"')", + catalogName, tableIdent)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot translate Spark expression"); + + Assertions.assertThatThrownBy( + () -> + sql( + "CALL %s.system.rewrite_data_files(table => '%s', where => 'c2 like \"%%fo\"')", + catalogName, tableIdent)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot convert Spark filter"); + } + private void createTable() { sql("CREATE TABLE %s (c1 int, c2 string, c3 string) USING iceberg", tableName); } diff --git a/spark/v3.2/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala b/spark/v3.2/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala index c4b5a7c0ce14..4ec46ee1401e 100644 --- a/spark/v3.2/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala +++ b/spark/v3.2/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala @@ -32,7 +32,17 @@ object SparkExpressionConverter { // Currently, it is a double conversion as we are converting Spark expression to Spark filter // and then converting Spark filter to Iceberg expression. // But these two conversions already exist and well tested. So, we are going with this approach. - SparkFilters.convert(DataSourceStrategy.translateFilter(sparkExpression, supportNestedPredicatePushdown = true).get) + DataSourceStrategy.translateFilter(sparkExpression, supportNestedPredicatePushdown = true) match { + case Some(filter) => + val converted = SparkFilters.convert(filter) + if (converted == null) { + throw new IllegalArgumentException(s"Cannot convert Spark filter: $filter to Iceberg expression") + } + + converted + case _ => + throw new IllegalArgumentException(s"Cannot translate Spark expression: $sparkExpression to data source filter") + } } @throws[AnalysisException] diff --git a/spark/v3.3/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java b/spark/v3.3/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java index 78fe78742b45..0cdde158bde3 100644 --- a/spark/v3.3/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java +++ b/spark/v3.3/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java @@ -41,6 +41,7 @@ import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.apache.spark.sql.catalyst.analysis.NoSuchProcedureException; +import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Assert; import org.junit.Assume; @@ -699,6 +700,26 @@ public void testDefaultSortOrder() { assertEquals("Data after compaction should not change", expectedRecords, actualRecords); } + @Test + public void testRewriteWithUntranslatedOrUnconvertedFilter() { + createTable(); + Assertions.assertThatThrownBy( + () -> + sql( + "CALL %s.system.rewrite_data_files(table => '%s', where => 'lower(c2) = \"fo\"')", + catalogName, tableIdent)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot translate Spark expression"); + + Assertions.assertThatThrownBy( + () -> + sql( + "CALL %s.system.rewrite_data_files(table => '%s', where => 'c2 like \"%%fo\"')", + catalogName, tableIdent)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot convert Spark filter"); + } + private void createTable() { sql("CREATE TABLE %s (c1 int, c2 string, c3 string) USING iceberg", tableName); } diff --git a/spark/v3.3/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java b/spark/v3.3/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java index 0bc2bb99611e..481e2f01f23b 100644 --- a/spark/v3.3/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java +++ b/spark/v3.3/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java @@ -29,6 +29,7 @@ import org.apache.iceberg.spark.data.TestHelpers; import org.apache.iceberg.spark.source.SimpleRecord; import org.apache.spark.sql.Encoders; +import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Assert; import org.junit.Test; @@ -202,6 +203,26 @@ public void testInvalidOption() throws Exception { catalogName, tableIdent)); } + @Test + public void testRewriteWithUntranslatedOrUnconvertedFilter() throws Exception { + createTable(); + Assertions.assertThatThrownBy( + () -> + sql( + "CALL %s.system.rewrite_position_delete_files(table => '%s', where => 'lower(data) = \"fo\"')", + catalogName, tableIdent)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot translate Spark expression"); + + Assertions.assertThatThrownBy( + () -> + sql( + "CALL %s.system.rewrite_position_delete_files(table => '%s', where => 'data like \"%%fo\"')", + catalogName, tableIdent)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot convert Spark filter"); + } + private Map snapshotSummary() { return validationCatalog.loadTable(tableIdent).currentSnapshot().summary(); } diff --git a/spark/v3.3/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala b/spark/v3.3/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala index 9f53eae60aba..7f6641e1b2bf 100644 --- a/spark/v3.3/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala +++ b/spark/v3.3/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala @@ -35,7 +35,17 @@ object SparkExpressionConverter { // Currently, it is a double conversion as we are converting Spark expression to Spark filter // and then converting Spark filter to Iceberg expression. // But these two conversions already exist and well tested. So, we are going with this approach. - SparkFilters.convert(DataSourceStrategy.translateFilter(sparkExpression, supportNestedPredicatePushdown = true).get) + DataSourceStrategy.translateFilter(sparkExpression, supportNestedPredicatePushdown = true) match { + case Some(filter) => + val converted = SparkFilters.convert(filter) + if (converted == null) { + throw new IllegalArgumentException(s"Cannot convert Spark filter: $filter to Iceberg expression") + } + + converted + case _ => + throw new IllegalArgumentException(s"Cannot translate Spark expression: $sparkExpression to data source filter") + } } @throws[AnalysisException] diff --git a/spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java b/spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java index 416d5eed5b65..5497017460e2 100644 --- a/spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java +++ b/spark/v3.3/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java @@ -43,6 +43,7 @@ import org.apache.iceberg.spark.SparkCatalogConfig; import org.apache.iceberg.spark.SparkTestBaseWithCatalog; import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.Pair; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.junit.After; @@ -129,7 +130,7 @@ private Table createPrimitiveTable() throws IOException { return table; } - private void createNestedTable() throws IOException { + private Pair createNestedTable() throws IOException { Table table = catalog.createTable( TableIdentifier.of(Namespace.of(database()), tableName()), @@ -145,6 +146,7 @@ private void createNestedTable() throws IOException { DataFile dataFile = FileHelpers.writeDataFile(table, Files.localOutput(temp.newFile()), records); table.newAppend().appendFile(dataFile).commit(); + return Pair.of(table, dataFile); } @After @@ -324,15 +326,22 @@ public void testSelectNestedValues() throws Exception { @Test public void testNestedValues() throws Exception { - createNestedTable(); - - Object[] leafDoubleCol = row(53L, 3L, 1L, 1L, 0.0D, 0.0D); - Object[] leafLongCol = row(54L, 3L, 1L, null, 0L, 1L); + Pair table = createNestedTable(); + int longColId = + table.first().schema().findField("nestedStructCol.leafStructCol.leafLongCol").fieldId(); + int doubleColId = + table.first().schema().findField("nestedStructCol.leafStructCol.leafDoubleCol").fieldId(); + + Object[] leafDoubleCol = + row(table.second().columnSizes().get(doubleColId), 3L, 1L, 1L, 0.0D, 0.0D); + Object[] leafLongCol = row(table.second().columnSizes().get(longColId), 3L, 1L, null, 0L, 1L); Object[] metrics = row(leafDoubleCol, leafLongCol); - assertEquals( - "Row should match", - ImmutableList.of(new Object[] {metrics}), - sql("SELECT readable_metrics FROM %s.files", tableName)); + List expected = ImmutableList.of(new Object[] {metrics}); + String sql = "SELECT readable_metrics FROM %s.%s"; + List filesReadableMetrics = sql(String.format(sql, tableName, "files")); + List entriesReadableMetrics = sql(String.format(sql, tableName, "entries")); + assertEquals("Row should match for files table", expected, filesReadableMetrics); + assertEquals("Row should match for entries table", expected, entriesReadableMetrics); } } diff --git a/spark/v3.4/spark/src/main/java/org/apache/iceberg/SparkDistributedDataScan.java b/spark/v3.4/spark/src/main/java/org/apache/iceberg/SparkDistributedDataScan.java index d4c2848b45e6..43ce2a303e2b 100644 --- a/spark/v3.4/spark/src/main/java/org/apache/iceberg/SparkDistributedDataScan.java +++ b/spark/v3.4/spark/src/main/java/org/apache/iceberg/SparkDistributedDataScan.java @@ -29,6 +29,7 @@ import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.ClosingIterator; import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.metrics.MetricsReporter; import org.apache.iceberg.relocated.com.google.common.base.Joiner; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -81,7 +82,7 @@ public class SparkDistributedDataScan extends BaseDistributedDataScan { private Broadcast
tableBroadcast = null; public SparkDistributedDataScan(SparkSession spark, Table table, SparkReadConf readConf) { - this(spark, table, readConf, table.schema(), TableScanContext.empty()); + this(spark, table, readConf, table.schema(), newTableScanContext(table)); } private SparkDistributedDataScan( @@ -134,6 +135,10 @@ private Iterable> doPlanDataRemotely( .flatMap(new ReadDataManifest(tableBroadcast(), context(), withColumnStats)); List> dataFileGroups = collectPartitions(dataFileRDD); + int matchingFilesCount = dataFileGroups.stream().mapToInt(List::size).sum(); + int skippedFilesCount = liveFilesCount(dataManifests) - matchingFilesCount; + scanMetrics().skippedDataFiles().increment(skippedFilesCount); + return Iterables.transform(dataFileGroups, CloseableIterable::withNoopClose); } @@ -157,6 +162,9 @@ private DeleteFileIndex doPlanDeletesRemotely(List deleteManifests .flatMap(new ReadDeleteManifest(tableBroadcast(), context())) .collect(); + int skippedFilesCount = liveFilesCount(deleteManifests) - deleteFiles.size(); + scanMetrics().skippedDeleteFiles().increment(skippedFilesCount); + return DeleteFileIndex.builderFor(deleteFiles) .specsById(table().specs()) .caseSensitive(isCaseSensitive()) @@ -193,6 +201,23 @@ private List> collectPartitions(JavaRDD rdd) { return Arrays.asList(rdd.collectPartitions(partitionIds)); } + private int liveFilesCount(List manifests) { + return manifests.stream().mapToInt(this::liveFilesCount).sum(); + } + + private int liveFilesCount(ManifestFile manifest) { + return manifest.existingFilesCount() + manifest.addedFilesCount(); + } + + private static TableScanContext newTableScanContext(Table table) { + if (table instanceof BaseTable) { + MetricsReporter reporter = ((BaseTable) table).reporter(); + return ImmutableTableScanContext.builder().metricsReporter(reporter).build(); + } else { + return TableScanContext.empty(); + } + } + private static class ReadDataManifest implements FlatMapFunction { private final Broadcast
table; diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/TestSparkDistributedDataScanReporting.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/TestSparkDistributedDataScanReporting.java new file mode 100644 index 000000000000..1ea4f990b272 --- /dev/null +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/TestSparkDistributedDataScanReporting.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.apache.iceberg.PlanningMode.DISTRIBUTED; +import static org.apache.iceberg.PlanningMode.LOCAL; + +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.spark.SparkReadConf; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.internal.SQLConf; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestSparkDistributedDataScanReporting + extends ScanPlanningAndReportingTestBase> { + + @Parameterized.Parameters(name = "dataMode = {0}, deleteMode = {1}") + public static Object[] parameters() { + return new Object[][] { + new Object[] {LOCAL, LOCAL}, + new Object[] {LOCAL, DISTRIBUTED}, + new Object[] {DISTRIBUTED, LOCAL}, + new Object[] {DISTRIBUTED, DISTRIBUTED} + }; + } + + private static SparkSession spark = null; + + private final PlanningMode dataMode; + private final PlanningMode deleteMode; + + public TestSparkDistributedDataScanReporting( + PlanningMode dataPlanningMode, PlanningMode deletePlanningMode) { + this.dataMode = dataPlanningMode; + this.deleteMode = deletePlanningMode; + } + + @BeforeClass + public static void startSpark() { + TestSparkDistributedDataScanReporting.spark = + SparkSession.builder() + .master("local[2]") + .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .config(SQLConf.SHUFFLE_PARTITIONS().key(), "4") + .getOrCreate(); + } + + @AfterClass + public static void stopSpark() { + SparkSession currentSpark = TestSparkDistributedDataScanReporting.spark; + TestSparkDistributedDataScanReporting.spark = null; + currentSpark.stop(); + } + + @Override + protected BatchScan newScan(Table table) { + table + .updateProperties() + .set(TableProperties.DATA_PLANNING_MODE, dataMode.modeName()) + .set(TableProperties.DELETE_PLANNING_MODE, deleteMode.modeName()) + .commit(); + SparkReadConf readConf = new SparkReadConf(spark, table, ImmutableMap.of()); + return new SparkDistributedDataScan(spark, table, readConf); + } +} diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java index f65da4574284..9075257fa9f1 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java @@ -43,6 +43,7 @@ import org.apache.iceberg.spark.SparkCatalogConfig; import org.apache.iceberg.spark.SparkTestBaseWithCatalog; import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.Pair; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.junit.After; @@ -129,7 +130,7 @@ private Table createPrimitiveTable() throws IOException { return table; } - private void createNestedTable() throws IOException { + private Pair createNestedTable() throws IOException { Table table = catalog.createTable( TableIdentifier.of(Namespace.of(database()), tableName()), @@ -145,6 +146,7 @@ private void createNestedTable() throws IOException { DataFile dataFile = FileHelpers.writeDataFile(table, Files.localOutput(temp.newFile()), records); table.newAppend().appendFile(dataFile).commit(); + return Pair.of(table, dataFile); } @After @@ -351,10 +353,15 @@ public void testSelectNestedValues() throws Exception { @Test public void testNestedValues() throws Exception { - createNestedTable(); - - Object[] leafDoubleCol = row(53L, 3L, 1L, 1L, 0.0D, 0.0D); - Object[] leafLongCol = row(54L, 3L, 1L, null, 0L, 1L); + Pair table = createNestedTable(); + int longColId = + table.first().schema().findField("nestedStructCol.leafStructCol.leafLongCol").fieldId(); + int doubleColId = + table.first().schema().findField("nestedStructCol.leafStructCol.leafDoubleCol").fieldId(); + + Object[] leafDoubleCol = + row(table.second().columnSizes().get(doubleColId), 3L, 1L, 1L, 0.0D, 0.0D); + Object[] leafLongCol = row(table.second().columnSizes().get(longColId), 3L, 1L, null, 0L, 1L); Object[] metrics = row(leafDoubleCol, leafLongCol); List expected = ImmutableList.of(new Object[] {metrics}); diff --git a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReaderDeletes.java b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReaderDeletes.java index cadcbad6aa76..d5ea85b335ab 100644 --- a/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReaderDeletes.java +++ b/spark/v3.4/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReaderDeletes.java @@ -38,6 +38,7 @@ import org.apache.iceberg.Files; import org.apache.iceberg.MetadataColumns; import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.PlanningMode; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; @@ -97,19 +98,21 @@ public class TestSparkReaderDeletes extends DeleteReadTests { protected static HiveCatalog catalog = null; private final String format; private final boolean vectorized; + private final PlanningMode planningMode; - public TestSparkReaderDeletes(String format, boolean vectorized) { + public TestSparkReaderDeletes(String format, boolean vectorized, PlanningMode planningMode) { this.format = format; this.vectorized = vectorized; + this.planningMode = planningMode; } - @Parameterized.Parameters(name = "format = {0}, vectorized = {1}") + @Parameterized.Parameters(name = "format = {0}, vectorized = {1}, planningMode = {2}") public static Object[][] parameters() { return new Object[][] { - new Object[] {"parquet", false}, - new Object[] {"parquet", true}, - new Object[] {"orc", false}, - new Object[] {"avro", false} + new Object[] {"parquet", false, PlanningMode.DISTRIBUTED}, + new Object[] {"parquet", true, PlanningMode.LOCAL}, + new Object[] {"orc", false, PlanningMode.DISTRIBUTED}, + new Object[] {"avro", false, PlanningMode.LOCAL} }; } @@ -162,7 +165,12 @@ protected Table createTable(String name, Schema schema, PartitionSpec spec) { TableOperations ops = ((BaseTable) table).operations(); TableMetadata meta = ops.current(); ops.commit(meta, meta.upgradeToFormatVersion(2)); - table.updateProperties().set(TableProperties.DEFAULT_FILE_FORMAT, format).commit(); + table + .updateProperties() + .set(TableProperties.DEFAULT_FILE_FORMAT, format) + .set(TableProperties.DATA_PLANNING_MODE, planningMode.modeName()) + .set(TableProperties.DELETE_PLANNING_MODE, planningMode.modeName()) + .commit(); if (format.equals("parquet") || format.equals("orc")) { String vectorizationEnabled = format.equals("parquet") diff --git a/spark/v3.5/spark-extensions/src/jmh/java/org/apache/iceberg/spark/PlanningBenchmark.java b/spark/v3.5/spark-extensions/src/jmh/java/org/apache/iceberg/spark/PlanningBenchmark.java index 4029b5ff3be9..e2ce5e956348 100644 --- a/spark/v3.5/spark-extensions/src/jmh/java/org/apache/iceberg/spark/PlanningBenchmark.java +++ b/spark/v3.5/spark-extensions/src/jmh/java/org/apache/iceberg/spark/PlanningBenchmark.java @@ -18,6 +18,8 @@ */ package org.apache.iceberg.spark; +import static org.apache.iceberg.PlanningMode.DISTRIBUTED; +import static org.apache.iceberg.PlanningMode.LOCAL; import static org.apache.spark.sql.functions.lit; import com.google.errorprone.annotations.FormatMethod; @@ -35,16 +37,19 @@ import org.apache.iceberg.DeleteFile; import org.apache.iceberg.FileMetadata; import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.PlanningMode; import org.apache.iceberg.RowDelta; import org.apache.iceberg.RowLevelOperationMode; import org.apache.iceberg.ScanTask; import org.apache.iceberg.Schema; +import org.apache.iceberg.SparkDistributedDataScan; import org.apache.iceberg.Table; import org.apache.iceberg.TableProperties; import org.apache.iceberg.expressions.Expression; import org.apache.iceberg.expressions.Expressions; import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.LocationProvider; +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Lists; import org.apache.iceberg.spark.data.RandomData; @@ -128,28 +133,88 @@ public void tearDownBenchmark() { @Benchmark @Threads(1) public void localPlanningWithPartitionAndMinMaxFilter(Blackhole blackhole) { - List fileTasks = planFilesWithoutColumnStats(PARTITION_AND_SORT_KEY_PREDICATE); + BatchScan scan = table.newBatchScan(); + List fileTasks = planFilesWithoutColumnStats(scan, PARTITION_AND_SORT_KEY_PREDICATE); + blackhole.consume(fileTasks); + } + + @Benchmark + @Threads(1) + public void distributedPlanningWithPartitionAndMinMaxFilter(Blackhole blackhole) { + BatchScan scan = newDistributedScan(DISTRIBUTED, DISTRIBUTED); + List fileTasks = planFilesWithoutColumnStats(scan, PARTITION_AND_SORT_KEY_PREDICATE); blackhole.consume(fileTasks); } @Benchmark @Threads(1) public void localPlanningWithMinMaxFilter(Blackhole blackhole) { - List fileTasks = planFilesWithoutColumnStats(SORT_KEY_PREDICATE); + BatchScan scan = table.newBatchScan(); + List fileTasks = planFilesWithoutColumnStats(scan, SORT_KEY_PREDICATE); + blackhole.consume(fileTasks); + } + + @Benchmark + @Threads(1) + public void distributedPlanningWithMinMaxFilter(Blackhole blackhole) { + BatchScan scan = newDistributedScan(DISTRIBUTED, DISTRIBUTED); + List fileTasks = planFilesWithoutColumnStats(scan, SORT_KEY_PREDICATE); blackhole.consume(fileTasks); } @Benchmark @Threads(1) public void localPlanningWithoutFilter(Blackhole blackhole) { - List fileTasks = planFilesWithoutColumnStats(Expressions.alwaysTrue()); + BatchScan scan = table.newBatchScan(); + List fileTasks = planFilesWithoutColumnStats(scan, Expressions.alwaysTrue()); + blackhole.consume(fileTasks); + } + + @Benchmark + @Threads(1) + public void distributedPlanningWithoutFilter(Blackhole blackhole) { + BatchScan scan = newDistributedScan(DISTRIBUTED, DISTRIBUTED); + List fileTasks = planFilesWithoutColumnStats(scan, Expressions.alwaysTrue()); blackhole.consume(fileTasks); } @Benchmark @Threads(1) public void localPlanningWithoutFilterWithStats(Blackhole blackhole) { - List fileTasks = planFilesWithColumnStats(Expressions.alwaysTrue()); + BatchScan scan = table.newBatchScan(); + List fileTasks = planFilesWithColumnStats(scan, Expressions.alwaysTrue()); + blackhole.consume(fileTasks); + } + + @Benchmark + @Threads(1) + public void distributedPlanningWithoutFilterWithStats(Blackhole blackhole) { + BatchScan scan = newDistributedScan(DISTRIBUTED, DISTRIBUTED); + List fileTasks = planFilesWithColumnStats(scan, Expressions.alwaysTrue()); + blackhole.consume(fileTasks); + } + + @Benchmark + @Threads(1) + public void distributedDataLocalDeletesPlanningWithoutFilterWithStats(Blackhole blackhole) { + BatchScan scan = newDistributedScan(DISTRIBUTED, LOCAL); + List fileTasks = planFilesWithColumnStats(scan, Expressions.alwaysTrue()); + blackhole.consume(fileTasks); + } + + @Benchmark + @Threads(1) + public void localDataDistributedDeletesPlanningWithoutFilterWithStats(Blackhole blackhole) { + BatchScan scan = newDistributedScan(LOCAL, DISTRIBUTED); + List fileTasks = planFilesWithColumnStats(scan, Expressions.alwaysTrue()); + blackhole.consume(fileTasks); + } + + @Benchmark + @Threads(1) + public void localPlanningViaDistributedScanWithoutFilterWithStats(Blackhole blackhole) { + BatchScan scan = newDistributedScan(LOCAL, LOCAL); + List fileTasks = planFilesWithColumnStats(scan, Expressions.alwaysTrue()); blackhole.consume(fileTasks); } @@ -158,6 +223,7 @@ private void setupSpark() { SparkSession.builder() .config("spark.ui.enabled", false) .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .config("spark.driver.maxResultSize", "8G") .config("spark.sql.extensions", IcebergSparkSessionExtensions.class.getName()) .config("spark.sql.catalog.spark_catalog", SparkSessionCatalog.class.getName()) .config("spark.sql.catalog.spark_catalog.type", "hadoop") @@ -312,30 +378,40 @@ private Dataset randomDataDF(Schema schema, int numRows) { return spark.internalCreateDataFrame(JavaRDD.toRDD(rowRDD), rowSparkType, false); } - private List planFilesWithoutColumnStats(Expression predicate) { - return planFiles(predicate, false); + private List planFilesWithoutColumnStats(BatchScan scan, Expression predicate) { + return planFiles(scan, predicate, false); } - private List planFilesWithColumnStats(Expression predicate) { - return planFiles(predicate, true); + private List planFilesWithColumnStats(BatchScan scan, Expression predicate) { + return planFiles(scan, predicate, true); } - private List planFiles(Expression predicate, boolean withColumnStats) { + private List planFiles(BatchScan scan, Expression predicate, boolean withColumnStats) { table.refresh(); - BatchScan scan = table.newBatchScan().filter(predicate); + BatchScan configuredScan = scan.filter(predicate); if (withColumnStats) { - scan.includeColumnStats(); + configuredScan = scan.includeColumnStats(); } - try (CloseableIterable fileTasks = scan.planFiles()) { + try (CloseableIterable fileTasks = configuredScan.planFiles()) { return Lists.newArrayList(fileTasks); } catch (IOException e) { throw new UncheckedIOException(e); } } + private BatchScan newDistributedScan(PlanningMode dataMode, PlanningMode deleteMode) { + table + .updateProperties() + .set(TableProperties.DATA_PLANNING_MODE, dataMode.modeName()) + .set(TableProperties.DELETE_PLANNING_MODE, deleteMode.modeName()) + .commit(); + SparkReadConf readConf = new SparkReadConf(spark, table, ImmutableMap.of()); + return new SparkDistributedDataScan(spark, table, readConf); + } + @FormatMethod private void sql(@FormatString String query, Object... args) { spark.sql(String.format(query, args)); diff --git a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java index 2449c20ab9a9..25e506a85a7f 100644 --- a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java +++ b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewriteDataFilesProcedure.java @@ -828,6 +828,26 @@ public void testDefaultSortOrder() { assertEquals("Data after compaction should not change", expectedRecords, actualRecords); } + @Test + public void testRewriteWithUntranslatedOrUnconvertedFilter() { + createTable(); + Assertions.assertThatThrownBy( + () -> + sql( + "CALL %s.system.rewrite_data_files(table => '%s', where => 'substr(encode(c2, \"utf-8\"), 2) = \"fo\"')", + catalogName, tableIdent)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot translate Spark expression"); + + Assertions.assertThatThrownBy( + () -> + sql( + "CALL %s.system.rewrite_data_files(table => '%s', where => 'substr(c2, 2) = \"fo\"')", + catalogName, tableIdent)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot convert Spark filter"); + } + private void createTable() { sql("CREATE TABLE %s (c1 int, c2 string, c3 string) USING iceberg", tableName); } diff --git a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java index 0bc2bb99611e..5dde5d698ee4 100644 --- a/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java +++ b/spark/v3.5/spark-extensions/src/test/java/org/apache/iceberg/spark/extensions/TestRewritePositionDeleteFilesProcedure.java @@ -29,6 +29,7 @@ import org.apache.iceberg.spark.data.TestHelpers; import org.apache.iceberg.spark.source.SimpleRecord; import org.apache.spark.sql.Encoders; +import org.assertj.core.api.Assertions; import org.junit.After; import org.junit.Assert; import org.junit.Test; @@ -202,6 +203,26 @@ public void testInvalidOption() throws Exception { catalogName, tableIdent)); } + @Test + public void testRewriteWithUntranslatedOrUnconvertedFilter() throws Exception { + createTable(); + Assertions.assertThatThrownBy( + () -> + sql( + "CALL %s.system.rewrite_position_delete_files(table => '%s', where => 'substr(encode(data, \"utf-8\"), 2) = \"fo\"')", + catalogName, tableIdent)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot translate Spark expression"); + + Assertions.assertThatThrownBy( + () -> + sql( + "CALL %s.system.rewrite_position_delete_files(table => '%s', where => 'substr(data, 2) = \"fo\"')", + catalogName, tableIdent)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("Cannot convert Spark filter"); + } + private Map snapshotSummary() { return validationCatalog.loadTable(tableIdent).currentSnapshot().summary(); } diff --git a/spark/v3.5/spark/src/main/java/org/apache/iceberg/SparkDistributedDataScan.java b/spark/v3.5/spark/src/main/java/org/apache/iceberg/SparkDistributedDataScan.java index d4c2848b45e6..43ce2a303e2b 100644 --- a/spark/v3.5/spark/src/main/java/org/apache/iceberg/SparkDistributedDataScan.java +++ b/spark/v3.5/spark/src/main/java/org/apache/iceberg/SparkDistributedDataScan.java @@ -29,6 +29,7 @@ import org.apache.iceberg.io.CloseableIterable; import org.apache.iceberg.io.ClosingIterator; import org.apache.iceberg.io.FileIO; +import org.apache.iceberg.metrics.MetricsReporter; import org.apache.iceberg.relocated.com.google.common.base.Joiner; import org.apache.iceberg.relocated.com.google.common.collect.Iterables; import org.apache.iceberg.relocated.com.google.common.collect.Lists; @@ -81,7 +82,7 @@ public class SparkDistributedDataScan extends BaseDistributedDataScan { private Broadcast
tableBroadcast = null; public SparkDistributedDataScan(SparkSession spark, Table table, SparkReadConf readConf) { - this(spark, table, readConf, table.schema(), TableScanContext.empty()); + this(spark, table, readConf, table.schema(), newTableScanContext(table)); } private SparkDistributedDataScan( @@ -134,6 +135,10 @@ private Iterable> doPlanDataRemotely( .flatMap(new ReadDataManifest(tableBroadcast(), context(), withColumnStats)); List> dataFileGroups = collectPartitions(dataFileRDD); + int matchingFilesCount = dataFileGroups.stream().mapToInt(List::size).sum(); + int skippedFilesCount = liveFilesCount(dataManifests) - matchingFilesCount; + scanMetrics().skippedDataFiles().increment(skippedFilesCount); + return Iterables.transform(dataFileGroups, CloseableIterable::withNoopClose); } @@ -157,6 +162,9 @@ private DeleteFileIndex doPlanDeletesRemotely(List deleteManifests .flatMap(new ReadDeleteManifest(tableBroadcast(), context())) .collect(); + int skippedFilesCount = liveFilesCount(deleteManifests) - deleteFiles.size(); + scanMetrics().skippedDeleteFiles().increment(skippedFilesCount); + return DeleteFileIndex.builderFor(deleteFiles) .specsById(table().specs()) .caseSensitive(isCaseSensitive()) @@ -193,6 +201,23 @@ private List> collectPartitions(JavaRDD rdd) { return Arrays.asList(rdd.collectPartitions(partitionIds)); } + private int liveFilesCount(List manifests) { + return manifests.stream().mapToInt(this::liveFilesCount).sum(); + } + + private int liveFilesCount(ManifestFile manifest) { + return manifest.existingFilesCount() + manifest.addedFilesCount(); + } + + private static TableScanContext newTableScanContext(Table table) { + if (table instanceof BaseTable) { + MetricsReporter reporter = ((BaseTable) table).reporter(); + return ImmutableTableScanContext.builder().metricsReporter(reporter).build(); + } else { + return TableScanContext.empty(); + } + } + private static class ReadDataManifest implements FlatMapFunction { private final Broadcast
table; diff --git a/spark/v3.5/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala b/spark/v3.5/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala index 4903a100f97f..d6f45657be28 100644 --- a/spark/v3.5/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala +++ b/spark/v3.5/spark/src/main/scala/org/apache/spark/sql/execution/datasources/SparkExpressionConverter.scala @@ -36,7 +36,17 @@ object SparkExpressionConverter { // Currently, it is a double conversion as we are converting Spark expression to Spark predicate // and then converting Spark predicate to Iceberg expression. // But these two conversions already exist and well tested. So, we are going with this approach. - SparkV2Filters.convert(DataSourceV2Strategy.translateFilterV2(sparkExpression).get) + DataSourceV2Strategy.translateFilterV2(sparkExpression) match { + case Some(filter) => + val converted = SparkV2Filters.convert(filter) + if (converted == null) { + throw new IllegalArgumentException(s"Cannot convert Spark filter: $filter to Iceberg expression") + } + + converted + case _ => + throw new IllegalArgumentException(s"Cannot translate Spark expression: $sparkExpression to data source filter") + } } @throws[AnalysisException] diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/TestSparkDistributedDataScanReporting.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/TestSparkDistributedDataScanReporting.java new file mode 100644 index 000000000000..1ea4f990b272 --- /dev/null +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/TestSparkDistributedDataScanReporting.java @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.iceberg; + +import static org.apache.iceberg.PlanningMode.DISTRIBUTED; +import static org.apache.iceberg.PlanningMode.LOCAL; + +import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap; +import org.apache.iceberg.spark.SparkReadConf; +import org.apache.spark.sql.SparkSession; +import org.apache.spark.sql.internal.SQLConf; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@RunWith(Parameterized.class) +public class TestSparkDistributedDataScanReporting + extends ScanPlanningAndReportingTestBase> { + + @Parameterized.Parameters(name = "dataMode = {0}, deleteMode = {1}") + public static Object[] parameters() { + return new Object[][] { + new Object[] {LOCAL, LOCAL}, + new Object[] {LOCAL, DISTRIBUTED}, + new Object[] {DISTRIBUTED, LOCAL}, + new Object[] {DISTRIBUTED, DISTRIBUTED} + }; + } + + private static SparkSession spark = null; + + private final PlanningMode dataMode; + private final PlanningMode deleteMode; + + public TestSparkDistributedDataScanReporting( + PlanningMode dataPlanningMode, PlanningMode deletePlanningMode) { + this.dataMode = dataPlanningMode; + this.deleteMode = deletePlanningMode; + } + + @BeforeClass + public static void startSpark() { + TestSparkDistributedDataScanReporting.spark = + SparkSession.builder() + .master("local[2]") + .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer") + .config(SQLConf.SHUFFLE_PARTITIONS().key(), "4") + .getOrCreate(); + } + + @AfterClass + public static void stopSpark() { + SparkSession currentSpark = TestSparkDistributedDataScanReporting.spark; + TestSparkDistributedDataScanReporting.spark = null; + currentSpark.stop(); + } + + @Override + protected BatchScan newScan(Table table) { + table + .updateProperties() + .set(TableProperties.DATA_PLANNING_MODE, dataMode.modeName()) + .set(TableProperties.DELETE_PLANNING_MODE, deleteMode.modeName()) + .commit(); + SparkReadConf readConf = new SparkReadConf(spark, table, ImmutableMap.of()); + return new SparkDistributedDataScan(spark, table, readConf); + } +} diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java index f65da4574284..9075257fa9f1 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestMetadataTableReadableMetrics.java @@ -43,6 +43,7 @@ import org.apache.iceberg.spark.SparkCatalogConfig; import org.apache.iceberg.spark.SparkTestBaseWithCatalog; import org.apache.iceberg.types.Types; +import org.apache.iceberg.util.Pair; import org.apache.spark.sql.Dataset; import org.apache.spark.sql.Row; import org.junit.After; @@ -129,7 +130,7 @@ private Table createPrimitiveTable() throws IOException { return table; } - private void createNestedTable() throws IOException { + private Pair createNestedTable() throws IOException { Table table = catalog.createTable( TableIdentifier.of(Namespace.of(database()), tableName()), @@ -145,6 +146,7 @@ private void createNestedTable() throws IOException { DataFile dataFile = FileHelpers.writeDataFile(table, Files.localOutput(temp.newFile()), records); table.newAppend().appendFile(dataFile).commit(); + return Pair.of(table, dataFile); } @After @@ -351,10 +353,15 @@ public void testSelectNestedValues() throws Exception { @Test public void testNestedValues() throws Exception { - createNestedTable(); - - Object[] leafDoubleCol = row(53L, 3L, 1L, 1L, 0.0D, 0.0D); - Object[] leafLongCol = row(54L, 3L, 1L, null, 0L, 1L); + Pair table = createNestedTable(); + int longColId = + table.first().schema().findField("nestedStructCol.leafStructCol.leafLongCol").fieldId(); + int doubleColId = + table.first().schema().findField("nestedStructCol.leafStructCol.leafDoubleCol").fieldId(); + + Object[] leafDoubleCol = + row(table.second().columnSizes().get(doubleColId), 3L, 1L, 1L, 0.0D, 0.0D); + Object[] leafLongCol = row(table.second().columnSizes().get(longColId), 3L, 1L, null, 0L, 1L); Object[] metrics = row(leafDoubleCol, leafLongCol); List expected = ImmutableList.of(new Object[] {metrics}); diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReaderDeletes.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReaderDeletes.java index cadcbad6aa76..d5ea85b335ab 100644 --- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReaderDeletes.java +++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReaderDeletes.java @@ -38,6 +38,7 @@ import org.apache.iceberg.Files; import org.apache.iceberg.MetadataColumns; import org.apache.iceberg.PartitionSpec; +import org.apache.iceberg.PlanningMode; import org.apache.iceberg.Schema; import org.apache.iceberg.Table; import org.apache.iceberg.TableMetadata; @@ -97,19 +98,21 @@ public class TestSparkReaderDeletes extends DeleteReadTests { protected static HiveCatalog catalog = null; private final String format; private final boolean vectorized; + private final PlanningMode planningMode; - public TestSparkReaderDeletes(String format, boolean vectorized) { + public TestSparkReaderDeletes(String format, boolean vectorized, PlanningMode planningMode) { this.format = format; this.vectorized = vectorized; + this.planningMode = planningMode; } - @Parameterized.Parameters(name = "format = {0}, vectorized = {1}") + @Parameterized.Parameters(name = "format = {0}, vectorized = {1}, planningMode = {2}") public static Object[][] parameters() { return new Object[][] { - new Object[] {"parquet", false}, - new Object[] {"parquet", true}, - new Object[] {"orc", false}, - new Object[] {"avro", false} + new Object[] {"parquet", false, PlanningMode.DISTRIBUTED}, + new Object[] {"parquet", true, PlanningMode.LOCAL}, + new Object[] {"orc", false, PlanningMode.DISTRIBUTED}, + new Object[] {"avro", false, PlanningMode.LOCAL} }; } @@ -162,7 +165,12 @@ protected Table createTable(String name, Schema schema, PartitionSpec spec) { TableOperations ops = ((BaseTable) table).operations(); TableMetadata meta = ops.current(); ops.commit(meta, meta.upgradeToFormatVersion(2)); - table.updateProperties().set(TableProperties.DEFAULT_FILE_FORMAT, format).commit(); + table + .updateProperties() + .set(TableProperties.DEFAULT_FILE_FORMAT, format) + .set(TableProperties.DATA_PLANNING_MODE, planningMode.modeName()) + .set(TableProperties.DELETE_PLANNING_MODE, planningMode.modeName()) + .commit(); if (format.equals("parquet") || format.equals("orc")) { String vectorizationEnabled = format.equals("parquet")