Skip to content

Commit

Permalink
Cache: keep (deserialized) object around (#9648)
Browse files Browse the repository at this point in the history
Refactor the cached-key-value to keep the deserialized object around leveraging a `java.lang.ref.SoftReference` to it. While this increases the overall heap pressure for each cached entity, the referenced objects are eligible for garbage collection, if Java GC decides to do so. This change continues to keep the serialized representation around (using a strong reference) - a "simple" `byte[]` is still way less individual Java objects compared to an "exploded" `Obj`, so less effort for GC.

This change should help to reducing the deserializing effort especially for very frequently accessed objects, despite the semantics of `SoftReference`.
  • Loading branch information
snazy authored Sep 30, 2024
1 parent 3bac82b commit 89faab9
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 30 deletions.
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ as necessary. Empty sections will not end in the release notes.

### Changes

- The persistence cache tries to avoid deserialization overhead when getting an object from the
cache by using Java's `SoftReference`. There is no guarantee that cached objects keep their
Java object tree around, but it should eventually for the majority of accesses to frequently
accessed cached objects. The default cache capacity fraction has been reduced from 70% of the
heap size to 60% of the heap size. However, extreme heap pressure may let Java GC clear all
`SoftReference`s.

### Deprecations

### Fixes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ public interface QuarkusStoreConfig extends StoreConfig {
/**
* Fraction of Java’s max heap size to use for cache objects, set to 0 to disable. Must not be
* used with fixed cache sizing. If neither this value nor a fixed size is configured, a default
* of .7 (70%) is assumed.
* of {@code .6} (60%) is assumed.
*/
@WithName(CONFIG_CACHE_CAPACITY_FRACTION_OF_HEAP)
OptionalDouble cacheCapacityFractionOfHeap();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ public interface CacheSizing {

int DEFAULT_HEAP_SIZE_KEEP_FREE = 256;
int DEFAULT_MIN_SIZE_MB = 64;
double DEFAULT_HEAP_FRACTION = 0.7d;
double DEFAULT_HEAP_FRACTION = 0.6d;

OptionalInt fixedSizeInMB();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import io.micrometer.core.instrument.Tag;
import io.micrometer.core.instrument.binder.cache.CaffeineStatsCounter;
import jakarta.annotation.Nonnull;
import java.lang.ref.SoftReference;
import java.time.Duration;
import org.checkerframework.checker.index.qual.NonNegative;
import org.projectnessie.versioned.storage.common.exceptions.ObjTooLargeException;
Expand All @@ -44,10 +45,11 @@
class CaffeineCacheBackend implements CacheBackend {

public static final String CACHE_NAME = "nessie-objects";
private static final byte[] NON_EXISTING_SENTINEL = "NON_EXISTING".getBytes(UTF_8);
private static final CacheKeyValue NON_EXISTING_SENTINEL =
new CacheKeyValue("x", ObjId.EMPTY_OBJ_ID, 0L, new byte[0], null);

private final CacheConfig config;
final Cache<CacheKeyValue, byte[]> cache;
final Cache<CacheKeyValue, CacheKeyValue> cache;

private final long refCacheTtlNanos;
private final long refCacheNegativeTtlNanos;
Expand All @@ -58,15 +60,15 @@ class CaffeineCacheBackend implements CacheBackend {
refCacheTtlNanos = config.referenceTtl().orElse(Duration.ZERO).toNanos();
refCacheNegativeTtlNanos = config.referenceNegativeTtl().orElse(Duration.ZERO).toNanos();

Caffeine<CacheKeyValue, byte[]> cacheBuilder =
Caffeine<CacheKeyValue, CacheKeyValue> cacheBuilder =
Caffeine.newBuilder()
.maximumWeight(config.capacityMb() * 1024L * 1024L)
.weigher(this::weigher)
.expireAfter(
new Expiry<CacheKeyValue, byte[]>() {
new Expiry<CacheKeyValue, CacheKeyValue>() {
@Override
public long expireAfterCreate(
CacheKeyValue key, byte[] value, long currentTimeNanos) {
CacheKeyValue key, CacheKeyValue value, long currentTimeNanos) {
long expire = key.expiresAtNanosEpoch;
if (expire == CACHE_UNLIMITED) {
return Long.MAX_VALUE;
Expand All @@ -81,7 +83,7 @@ public long expireAfterCreate(
@Override
public long expireAfterUpdate(
CacheKeyValue key,
byte[] value,
CacheKeyValue value,
long currentTimeNanos,
@NonNegative long currentDurationNanos) {
return expireAfterCreate(key, value, currentTimeNanos);
Expand All @@ -90,7 +92,7 @@ public long expireAfterUpdate(
@Override
public long expireAfterRead(
CacheKeyValue key,
byte[] value,
CacheKeyValue value,
long currentTimeNanos,
@NonNegative long currentDurationNanos) {
return currentDurationNanos;
Expand Down Expand Up @@ -118,26 +120,23 @@ public Persist wrap(@Nonnull Persist persist) {
return new CachingPersistImpl(persist, cache);
}

private int weigher(CacheKeyValue key, byte[] value) {
private int weigher(CacheKeyValue key, CacheKeyValue value) {
int size = key.heapSize();
if (value != null) {
size += ARRAY_OVERHEAD + value.length;
}
size += CAFFEINE_OBJ_OVERHEAD;
return size;
}

@Override
public Obj get(@Nonnull String repositoryId, @Nonnull ObjId id) {
CacheKeyValue key = cacheKey(repositoryId, id);
byte[] value = cache.getIfPresent(key);
CacheKeyValue value = cache.getIfPresent(key);
if (value == null) {
return null;
}
if (value == NON_EXISTING_SENTINEL) {
return NOT_FOUND_OBJ_SENTINEL;
}
return ProtoSerialization.deserializeObj(id, 0L, value, null);
return value.getObj();
}

@Override
Expand All @@ -159,8 +158,9 @@ public void putLocal(@Nonnull String repositoryId, @Nonnull Obj obj) {
byte[] serialized = serializeObj(obj, Integer.MAX_VALUE, Integer.MAX_VALUE, true);
long expiresAtNanos =
expiresAt == CACHE_UNLIMITED ? CACHE_UNLIMITED : MICROSECONDS.toNanos(expiresAt);
CacheKeyValue keyValue = cacheKeyValue(repositoryId, obj.id(), expiresAtNanos);
cache.put(keyValue, serialized);
CacheKeyValue keyValue =
cacheKeyValue(repositoryId, obj.id(), expiresAtNanos, serialized, obj);
cache.put(keyValue, keyValue);
} catch (ObjTooLargeException e) {
// this should never happen
throw new RuntimeException(e);
Expand Down Expand Up @@ -220,9 +220,14 @@ public void putReferenceLocal(@Nonnull String repositoryId, @Nonnull Reference r
return;
}
ObjId id = refObjId(r.name());
CacheKeyValue key =
cacheKeyValue(repositoryId, id, config.clockNanos().getAsLong() + refCacheTtlNanos);
cache.put(key, serializeReference(r));
CacheKeyValue keyValue =
cacheKeyValue(
repositoryId,
id,
config.clockNanos().getAsLong() + refCacheTtlNanos,
serializeReference(r),
r);
cache.put(keyValue, keyValue);
}

@Override
Expand All @@ -242,12 +247,14 @@ public Reference getReference(@Nonnull String repositoryId, @Nonnull String name
return null;
}
ObjId id = refObjId(name);
CacheKeyValue keyValue = cacheKey(repositoryId, id);
byte[] bytes = cache.getIfPresent(keyValue);
if (bytes == NON_EXISTING_SENTINEL) {
CacheKeyValue value = cache.getIfPresent(cacheKey(repositoryId, id));
if (value == null) {
return null;
}
if (value == NON_EXISTING_SENTINEL) {
return NON_EXISTENT_REFERENCE_SENTINEL;
}
return bytes != null ? deserializeReference(bytes) : null;
return value.getReference();
}

static CacheKeyValue cacheKey(String repositoryId, ObjId id) {
Expand All @@ -259,6 +266,11 @@ private static CacheKeyValue cacheKeyValue(
return new CacheKeyValue(repositoryId, id, expiresAtNanosEpoch);
}

private static CacheKeyValue cacheKeyValue(
String repositoryId, ObjId id, long expiresAtNanosEpoch, byte[] serialized, Object object) {
return new CacheKeyValue(repositoryId, id, expiresAtNanosEpoch, serialized, object);
}

/**
* Class used for both the cache key and cache value including the expiration timestamp. This is
* (should be) more efficient (think: mono-morphic vs bi-morphic call sizes) and more GC/heap
Expand All @@ -273,20 +285,35 @@ static final class CacheKeyValue {
// Revisit this field before 2262-04-11T23:47:16.854Z (64-bit signed long overflow) ;) ;)
final long expiresAtNanosEpoch;

final byte[] serialized;
java.lang.ref.Reference<Object> object;

CacheKeyValue(String repositoryId, ObjId id) {
this(repositoryId, id, 0L);
}

CacheKeyValue(String repositoryId, ObjId id, long expiresAtNanosEpoch) {
this(repositoryId, id, expiresAtNanosEpoch, null, null);
}

CacheKeyValue(
String repositoryId, ObjId id, long expiresAtNanosEpoch, byte[] serialized, Object object) {
this.repositoryId = repositoryId;
this.id = id;
this.expiresAtNanosEpoch = expiresAtNanosEpoch;
this.serialized = serialized;
this.object = new SoftReference<>(object, null);
}

int heapSize() {
int size = OBJ_SIZE;
size += STRING_OBJ_OVERHEAD + repositoryId.length();
size += id.heapSize();
byte[] s = serialized;
if (s != null) {
size += ARRAY_OVERHEAD + s.length;
}
size += SOFT_REFERENCE_OVERHEAD;
return size;
}

Expand All @@ -311,6 +338,26 @@ public int hashCode() {
public String toString() {
return "{" + repositoryId + ", " + id + '}';
}

Obj getObj() {
Obj obj = (Obj) this.object.get();
if (obj == null) {
obj = ProtoSerialization.deserializeObj(id, 0L, this.serialized, null);
// re-create the soft reference - but don't care about JMM side effects
this.object = new SoftReference<>(obj);
}
return obj;
}

Reference getReference() {
Reference ref = (Reference) this.object.get();
if (ref == null) {
ref = deserializeReference(this.serialized);
// re-create the soft reference - but don't care about JMM side effects
this.object = new SoftReference<>(ref);
}
return ref;
}
}

/*
Expand All @@ -321,15 +368,18 @@ public String toString() {
12 4 java.lang.String CacheKeyValue.repositoryId null
16 8 long CacheKeyValue.expiresAt 0
24 4 org.projectnessie.versioned.storage.common.persist.ObjId CacheKeyValue.id null
28 4 (object alignment gap)
Instance size: 32 bytes
28 4 byte[] CacheKeyValue.serialized null
32 4 java.lang.ref.Reference CacheKeyValue.object null
36 4 (object alignment gap)
Instance size: 40 bytes
Space losses: 0 bytes internal + 4 bytes external = 4 bytes total
*/
static final int OBJ_SIZE = 32;
static final int OBJ_SIZE = 40;
/*
Array overhead: 16 bytes
*/
static final int ARRAY_OVERHEAD = 16;
static final int SOFT_REFERENCE_OVERHEAD = 32;
/*
java.lang.String object internals:
OFF SZ TYPE DESCRIPTION VALUE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ public void cachingObjectsExpiration() {
backend.put("repo", dynamicCachingObj);
backend.put("repo", stdObj);

ConcurrentMap<CaffeineCacheBackend.CacheKeyValue, byte[]> cacheMap = backend.cache.asMap();
ConcurrentMap<CaffeineCacheBackend.CacheKeyValue, CaffeineCacheBackend.CacheKeyValue> cacheMap =
backend.cache.asMap();

soft.assertThat(cacheMap)
.doesNotContainKey(CaffeineCacheBackend.cacheKey("repo", nonCachingObj.id()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,14 +82,14 @@ void tinyHeapNoCache() {
void defaultSettings4G() {
// Assuming a 4G max heap, requesting 70% (358MB), sizing must yield 2867MB.
soft.assertThat(CacheSizing.builder().build().calculateEffectiveSizeInMB(BYTES_4G))
.isEqualTo(2867);
.isEqualTo(2457);
}

@Test
void defaultSettings1G() {
soft.assertThat(CacheSizing.builder().build().calculateEffectiveSizeInMB(BYTES_1G))
// 70 % of 1024 MB
.isEqualTo(716);
.isEqualTo(614);
}

@Test
Expand Down

0 comments on commit 89faab9

Please sign in to comment.