Skip to content

Commit

Permalink
[1.4.x] Core: Expired Snapshot files in a transaction should be delet…
Browse files Browse the repository at this point in the history
…ed (#9223)

* Core: Expired Snapshot files in a transaction should be deleted. (#9183)

When a snapshot is expired as part of a transaction, the manifest list
file(s) should be deleted when the transaction commits. A recent change
(#6634) ensured that these files are not deleted when they have also
been committed as part of a transaction, but this breaks the simple
case where no new files are committed. Fix this by not skipping
deletion when the list of committed files is empty.

* Core: Fix logic for determining set of committed files in BaseTransaction when there are no new snapshots (#9221)

---------

Co-authored-by: Amogh Jahagirdar <[email protected]>
  • Loading branch information
bartash and amogh-jahagirdar authored Dec 19, 2023
1 parent 793f815 commit 63e0204
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 2 deletions.
5 changes: 4 additions & 1 deletion core/src/main/java/org/apache/iceberg/BaseTransaction.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import org.apache.iceberg.metrics.MetricsReporter;
import org.apache.iceberg.relocated.com.google.common.annotations.VisibleForTesting;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.util.PropertyUtil;
Expand Down Expand Up @@ -505,9 +506,11 @@ private void applyUpdates(TableOperations underlyingOps) {
}
}

// committedFiles returns null whenever the set of committed files
// cannot be determined from the provided snapshots
private static Set<String> committedFiles(TableOperations ops, Set<Long> snapshotIds) {
if (snapshotIds.isEmpty()) {
return null;
return ImmutableSet.of();
}

Set<String> committedFiles = Sets.newHashSet();
Expand Down
9 changes: 9 additions & 0 deletions core/src/test/java/org/apache/iceberg/TableTestBase.java
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,15 @@ List<File> listManifestFiles(File tableDirToList) {
&& Files.getFileExtension(name).equalsIgnoreCase("avro")));
}

List<File> listManifestLists(String tableDirToList) {
return Lists.newArrayList(
new File(tableDirToList, "metadata")
.listFiles(
(dir, name) ->
name.startsWith("snap")
&& Files.getFileExtension(name).equalsIgnoreCase("avro")));
}

public static long countAllMetadataFiles(File tableDir) {
return Arrays.stream(new File(tableDir, "metadata").listFiles())
.filter(f -> f.isFile())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,10 @@ public void testRetainNAvailableSnapshotsWithTransaction() {
t3 = System.currentTimeMillis();
}

// Retain last 2 snapshots
Assert.assertEquals(
"Should be 3 manifest lists", 3, listManifestLists(table.location()).size());

// Retain last 2 snapshots, which means 1 is deleted.
Transaction tx = table.newTransaction();
removeSnapshots(tx.table()).expireOlderThan(t3).retainLast(2).commit();
tx.commitTransaction();
Expand All @@ -449,6 +452,8 @@ public void testRetainNAvailableSnapshotsWithTransaction() {
"Should have two snapshots.", 2, Lists.newArrayList(table.snapshots()).size());
Assert.assertEquals(
"First snapshot should not present.", null, table.snapshot(firstSnapshotId));
Assert.assertEquals(
"Should be 2 manifest lists", 2, listManifestLists(table.location()).size());
}

@Test
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,8 @@ public void testExpirationInTransaction() {
V2Assert.assertEquals("Snapshot sequence number should be 1", 1, snap1.sequenceNumber());
V2Assert.assertEquals(
"Last sequence number should be 1", 1, readMetadata().lastSequenceNumber());
V2Assert.assertEquals(
"Should be 1 manifest list", 1, listManifestLists(table.location()).size());

table.newAppend().appendFile(FILE_B).commit();
Snapshot snap2 = table.currentSnapshot();
Expand All @@ -319,12 +321,18 @@ public void testExpirationInTransaction() {
V2Assert.assertEquals("Snapshot sequence number should be 2", 2, snap2.sequenceNumber());
V2Assert.assertEquals(
"Last sequence number should be 2", 2, readMetadata().lastSequenceNumber());
V2Assert.assertEquals(
"Should be 2 manifest lists", 2, listManifestLists(table.location()).size());

Transaction txn = table.newTransaction();
txn.expireSnapshots().expireSnapshotId(commitId1).commit();
txn.commitTransaction();
V2Assert.assertEquals(
"Last sequence number should be 2", 2, readMetadata().lastSequenceNumber());
V2Assert.assertEquals(
"Should be 1 manifest list as 1 was deleted",
1,
listManifestLists(table.location()).size());
}

@Test
Expand Down

0 comments on commit 63e0204

Please sign in to comment.