Skip to content

Commit

Permalink
GC: always look into all keys of the last commit (#9400)
Browse files Browse the repository at this point in the history
There is a chance that GC might not recognize all live contents, if the commit history is shorter than the cutoff policy _and_(!) commits are not accessible (database corruption).

No realistical functional or behavioral change, rather a precaution.
  • Loading branch information
snazy authored Aug 26, 2024
1 parent 0538ff1 commit 5defbc6
Showing 1 changed file with 25 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
*/
package org.projectnessie.gc.identify;

import static java.lang.String.format;

import com.google.common.base.Preconditions;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import jakarta.annotation.Nullable;
Expand Down Expand Up @@ -241,6 +243,7 @@ private ReferencesWalkResult identifyContentsForReference(

LogEntryHolder holder = new LogEntryHolder();
String lastCommitId = null;
String finalCommitId = null;

for (Spliterator<LogResponse.LogEntry> spliterator = commits.spliterator();
spliterator.tryAdvance(holder::set); ) {
Expand Down Expand Up @@ -308,33 +311,34 @@ private ReferencesWalkResult identifyContentsForReference(
}));
} else {
// 1st non-live commit
try {
numContents += collectAllKeys(addContents, Detached.of(lastCommitId));
} catch (NessieNotFoundException e) {
throw new RuntimeException(e);
}
LOGGER.info(
"live-set#{}: Finished walking the commit log of {} using {} after {} commits, "
+ "commit {} is the first non-live commit.",
addContents.id(),
namedReference,
cutoffPolicy,
numCommits,
commitHash);
return ReferencesWalkResult.single(numCommits, numContents);
finalCommitId = commitHash;
break;
}
}

// Always consider all content reachable from the last live commit.
if (lastCommitId != null) {
try {
numContents += collectAllKeys(addContents, Detached.of(lastCommitId));
} catch (NessieNotFoundException e) {
throw new RuntimeException(e);
}
}

LOGGER.info(
"live-set#{}: Finished walking the commit log of {} using {} after {} commits, {}",
addContents.id(),
namedReference,
cutoffPolicy,
numCommits,
finalCommitId != null
? format("commit %s is the first non-live commit.", finalCommitId)
: "no more commits");
return ReferencesWalkResult.single(numCommits, numContents);
} catch (NessieNotFoundException e) {
throw new RuntimeException(
"GC-run#" + addContents.id() + ": Could not find reference " + namedReference, e);
}
LOGGER.info(
"live-set#{}: Finished walking the commit log of {} using {} after {} commits, no more commits.",
addContents.id(),
namedReference,
cutoffPolicy,
numCommits);
return ReferencesWalkResult.single(numCommits, numContents);
}

@SuppressWarnings("resource")
Expand Down

0 comments on commit 5defbc6

Please sign in to comment.