Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[1.4.x] Core: Fix missing files from transaction retries with conflicting manifest merges (#9230) #9337

Merged
merged 1 commit into from
Dec 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions core/src/main/java/org/apache/iceberg/FastAppend.java
Original file line number Diff line number Diff line change
Expand Up @@ -179,16 +179,16 @@ public Object updateEvent() {
@Override
protected void cleanUncommitted(Set<ManifestFile> committed) {
if (newManifests != null) {
List<ManifestFile> committedNewManifests = Lists.newArrayList();
boolean hasDeletes = false;
for (ManifestFile manifest : newManifests) {
if (committed.contains(manifest)) {
committedNewManifests.add(manifest);
} else {
if (!committed.contains(manifest)) {
deleteFile(manifest.path());
hasDeletes = true;
}
}

this.newManifests = committedNewManifests;
if (hasDeletes) {
this.newManifests = null;
}
}

// clean up only rewrittenAppendManifests as they are always owned by the table
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -886,16 +886,17 @@ public Object updateEvent() {

private void cleanUncommittedAppends(Set<ManifestFile> committed) {
if (cachedNewDataManifests != null) {
List<ManifestFile> committedNewDataManifests = Lists.newArrayList();
boolean hasDeletes = false;
for (ManifestFile manifest : cachedNewDataManifests) {
if (committed.contains(manifest)) {
committedNewDataManifests.add(manifest);
} else {
if (!committed.contains(manifest)) {
deleteFile(manifest.path());
hasDeletes = true;
}
}

this.cachedNewDataManifests = committedNewDataManifests;
if (hasDeletes) {
this.cachedNewDataManifests = null;
}
}

ListIterator<ManifestFile> deleteManifestsIterator = cachedNewDeleteManifests.listIterator();
Expand Down
47 changes: 47 additions & 0 deletions core/src/test/java/org/apache/iceberg/TestTransaction.java
Original file line number Diff line number Diff line change
Expand Up @@ -760,4 +760,51 @@ public void testSimpleTransactionNotDeletingMetadataOnUnknownSate() throws IOExc
Assert.assertTrue("Manifest file should exist", new File(manifests.get(0).path()).exists());
Assert.assertEquals("Should have 2 files in metadata", 2, countAllMetadataFiles(tableDir));
}

@Test
public void testTransactionRecommit() {
// update table settings to merge when there are 3 manifests
table.updateProperties().set(TableProperties.MANIFEST_MIN_MERGE_COUNT, "3").commit();

// create manifests so that the next commit will trigger a merge
table.newFastAppend().appendFile(FILE_A).commit();
table.newFastAppend().appendFile(FILE_B).commit();

// start a transaction with appended files that will merge
Transaction transaction = Transactions.newTransaction(table.name(), table.ops());

AppendFiles append = transaction.newAppend().appendFile(FILE_D);
Snapshot pending = append.apply();

Assert.assertEquals(
"Should produce 1 pending merged manifest", 1, pending.allManifests(table.io()).size());

// because a merge happened, the appended manifest is deleted the by append operation
append.commit();

// concurrently commit FILE_A without a transaction to cause the previous append to retry
table.newAppend().appendFile(FILE_C).commit();
Assert.assertEquals(
"Should produce 1 committed merged manifest",
1,
table.currentSnapshot().allManifests(table.io()).size());

transaction.commitTransaction();

Set<String> paths =
Sets.newHashSet(
Iterables.transform(
table.newScan().planFiles(), task -> task.file().path().toString()));
Set<String> expectedPaths =
Sets.newHashSet(
FILE_A.path().toString(),
FILE_B.path().toString(),
FILE_C.path().toString(),
FILE_D.path().toString());

Assert.assertEquals("Should contain all committed files", expectedPaths, paths);

Assert.assertEquals(
"Should produce 2 manifests", 2, table.currentSnapshot().allManifests(table.io()).size());
}
}