Skip to content

Commit

Permalink
Cache: keep (deserialized) object around
Browse files Browse the repository at this point in the history
Refactor the cached-key-value to keep the deserialized object around leveraging a `java.lang.ref.SoftReference` to it. While this increases the overall heap pressure for each cached entity, the referenced objects are eligible for garbage collection, if Java GC decides to do so. This change continues to keep the serialized representation around (using a strong reference) - a "simple" `byte[]` is still way less individual Java objects compared to an "exploded" `Obj`, so less effort for GC.

This change should help to reducing the deserializing effort especially for very frequently accessed objects, despite the semantics of `SoftReference`.
  • Loading branch information
snazy committed Sep 26, 2024
1 parent 21b4537 commit 8307a13
Show file tree
Hide file tree
Showing 2 changed files with 77 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import io.micrometer.core.instrument.Tag;
import io.micrometer.core.instrument.binder.cache.CaffeineStatsCounter;
import jakarta.annotation.Nonnull;
import java.lang.ref.SoftReference;
import java.time.Duration;
import org.checkerframework.checker.index.qual.NonNegative;
import org.projectnessie.versioned.storage.common.exceptions.ObjTooLargeException;
Expand All @@ -44,10 +45,11 @@
class CaffeineCacheBackend implements CacheBackend {

public static final String CACHE_NAME = "nessie-objects";
private static final byte[] NON_EXISTING_SENTINEL = "NON_EXISTING".getBytes(UTF_8);
private static final CacheKeyValue NON_EXISTING_SENTINEL =
new CacheKeyValue("x", ObjId.EMPTY_OBJ_ID, 0L, new byte[0], null);

private final CacheConfig config;
final Cache<CacheKeyValue, byte[]> cache;
final Cache<CacheKeyValue, CacheKeyValue> cache;

private final long refCacheTtlNanos;
private final long refCacheNegativeTtlNanos;
Expand All @@ -58,15 +60,15 @@ class CaffeineCacheBackend implements CacheBackend {
refCacheTtlNanos = config.referenceTtl().orElse(Duration.ZERO).toNanos();
refCacheNegativeTtlNanos = config.referenceNegativeTtl().orElse(Duration.ZERO).toNanos();

Caffeine<CacheKeyValue, byte[]> cacheBuilder =
Caffeine<CacheKeyValue, CacheKeyValue> cacheBuilder =
Caffeine.newBuilder()
.maximumWeight(config.capacityMb() * 1024L * 1024L)
.weigher(this::weigher)
.expireAfter(
new Expiry<CacheKeyValue, byte[]>() {
new Expiry<CacheKeyValue, CacheKeyValue>() {
@Override
public long expireAfterCreate(
CacheKeyValue key, byte[] value, long currentTimeNanos) {
CacheKeyValue key, CacheKeyValue value, long currentTimeNanos) {
long expire = key.expiresAtNanosEpoch;
if (expire == CACHE_UNLIMITED) {
return Long.MAX_VALUE;
Expand All @@ -81,7 +83,7 @@ public long expireAfterCreate(
@Override
public long expireAfterUpdate(
CacheKeyValue key,
byte[] value,
CacheKeyValue value,
long currentTimeNanos,
@NonNegative long currentDurationNanos) {
return expireAfterCreate(key, value, currentTimeNanos);
Expand All @@ -90,7 +92,7 @@ public long expireAfterUpdate(
@Override
public long expireAfterRead(
CacheKeyValue key,
byte[] value,
CacheKeyValue value,
long currentTimeNanos,
@NonNegative long currentDurationNanos) {
return currentDurationNanos;
Expand Down Expand Up @@ -118,26 +120,23 @@ public Persist wrap(@Nonnull Persist persist) {
return new CachingPersistImpl(persist, cache);
}

private int weigher(CacheKeyValue key, byte[] value) {
private int weigher(CacheKeyValue key, CacheKeyValue value) {
int size = key.heapSize();
if (value != null) {
size += ARRAY_OVERHEAD + value.length;
}
size += CAFFEINE_OBJ_OVERHEAD;
return size;
}

@Override
public Obj get(@Nonnull String repositoryId, @Nonnull ObjId id) {
CacheKeyValue key = cacheKey(repositoryId, id);
byte[] value = cache.getIfPresent(key);
CacheKeyValue value = cache.getIfPresent(key);
if (value == null) {
return null;
}
if (value == NON_EXISTING_SENTINEL) {
return NOT_FOUND_OBJ_SENTINEL;
}
return ProtoSerialization.deserializeObj(id, 0L, value, null);
return value.getObj();
}

@Override
Expand All @@ -159,8 +158,9 @@ public void putLocal(@Nonnull String repositoryId, @Nonnull Obj obj) {
byte[] serialized = serializeObj(obj, Integer.MAX_VALUE, Integer.MAX_VALUE, true);
long expiresAtNanos =
expiresAt == CACHE_UNLIMITED ? CACHE_UNLIMITED : MICROSECONDS.toNanos(expiresAt);
CacheKeyValue keyValue = cacheKeyValue(repositoryId, obj.id(), expiresAtNanos);
cache.put(keyValue, serialized);
CacheKeyValue keyValue =
cacheKeyValue(repositoryId, obj.id(), expiresAtNanos, serialized, obj);
cache.put(keyValue, keyValue);
} catch (ObjTooLargeException e) {
// this should never happen
throw new RuntimeException(e);
Expand Down Expand Up @@ -220,9 +220,14 @@ public void putReferenceLocal(@Nonnull String repositoryId, @Nonnull Reference r
return;
}
ObjId id = refObjId(r.name());
CacheKeyValue key =
cacheKeyValue(repositoryId, id, config.clockNanos().getAsLong() + refCacheTtlNanos);
cache.put(key, serializeReference(r));
CacheKeyValue keyValue =
cacheKeyValue(
repositoryId,
id,
config.clockNanos().getAsLong() + refCacheTtlNanos,
serializeReference(r),
r);
cache.put(keyValue, keyValue);
}

@Override
Expand All @@ -242,12 +247,14 @@ public Reference getReference(@Nonnull String repositoryId, @Nonnull String name
return null;
}
ObjId id = refObjId(name);
CacheKeyValue keyValue = cacheKey(repositoryId, id);
byte[] bytes = cache.getIfPresent(keyValue);
if (bytes == NON_EXISTING_SENTINEL) {
CacheKeyValue value = cache.getIfPresent(cacheKey(repositoryId, id));
if (value == null) {
return null;
}
if (value == NON_EXISTING_SENTINEL) {
return NON_EXISTENT_REFERENCE_SENTINEL;
}
return bytes != null ? deserializeReference(bytes) : null;
return value.getReference();
}

static CacheKeyValue cacheKey(String repositoryId, ObjId id) {
Expand All @@ -259,6 +266,11 @@ private static CacheKeyValue cacheKeyValue(
return new CacheKeyValue(repositoryId, id, expiresAtNanosEpoch);
}

private static CacheKeyValue cacheKeyValue(
String repositoryId, ObjId id, long expiresAtNanosEpoch, byte[] serialized, Object object) {
return new CacheKeyValue(repositoryId, id, expiresAtNanosEpoch, serialized, object);
}

/**
* Class used for both the cache key and cache value including the expiration timestamp. This is
* (should be) more efficient (think: mono-morphic vs bi-morphic call sizes) and more GC/heap
Expand All @@ -273,20 +285,35 @@ static final class CacheKeyValue {
// Revisit this field before 2262-04-11T23:47:16.854Z (64-bit signed long overflow) ;) ;)
final long expiresAtNanosEpoch;

final byte[] serialized;
java.lang.ref.Reference<Object> object;

CacheKeyValue(String repositoryId, ObjId id) {
this(repositoryId, id, 0L);
}

CacheKeyValue(String repositoryId, ObjId id, long expiresAtNanosEpoch) {
this(repositoryId, id, expiresAtNanosEpoch, null, null);
}

CacheKeyValue(
String repositoryId, ObjId id, long expiresAtNanosEpoch, byte[] serialized, Object object) {
this.repositoryId = repositoryId;
this.id = id;
this.expiresAtNanosEpoch = expiresAtNanosEpoch;
this.serialized = serialized;
this.object = new SoftReference<>(object, null);
}

int heapSize() {
int size = OBJ_SIZE;
size += STRING_OBJ_OVERHEAD + repositoryId.length();
size += id.heapSize();
byte[] s = serialized;
if (s != null) {
size += ARRAY_OVERHEAD + s.length;
}
size += SOFT_REFERENCE_OVERHEAD;
return size;
}

Expand All @@ -311,6 +338,26 @@ public int hashCode() {
public String toString() {
return "{" + repositoryId + ", " + id + '}';
}

Obj getObj() {
Obj obj = (Obj) this.object.get();
if (obj == null) {
obj = ProtoSerialization.deserializeObj(id, 0L, this.serialized, null);
// re-create the soft reference - but don't care about JMM side effects
this.object = new SoftReference<>(obj);
}
return obj;
}

Reference getReference() {
Reference ref = (Reference) this.object.get();
if (ref == null) {
ref = deserializeReference(this.serialized);
// re-create the soft reference - but don't care about JMM side effects
this.object = new SoftReference<>(ref);
}
return ref;
}
}

/*
Expand All @@ -321,15 +368,18 @@ public String toString() {
12 4 java.lang.String CacheKeyValue.repositoryId null
16 8 long CacheKeyValue.expiresAt 0
24 4 org.projectnessie.versioned.storage.common.persist.ObjId CacheKeyValue.id null
28 4 (object alignment gap)
Instance size: 32 bytes
28 4 byte[] CacheKeyValue.serialized null
32 4 java.lang.ref.Reference CacheKeyValue.object null
36 4 (object alignment gap)
Instance size: 40 bytes
Space losses: 0 bytes internal + 4 bytes external = 4 bytes total
*/
static final int OBJ_SIZE = 32;
static final int OBJ_SIZE = 40;
/*
Array overhead: 16 bytes
*/
static final int ARRAY_OVERHEAD = 16;
static final int SOFT_REFERENCE_OVERHEAD = 32;
/*
java.lang.String object internals:
OFF SZ TYPE DESCRIPTION VALUE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ public void cachingObjectsExpiration() {
backend.put("repo", dynamicCachingObj);
backend.put("repo", stdObj);

ConcurrentMap<CaffeineCacheBackend.CacheKeyValue, byte[]> cacheMap = backend.cache.asMap();
ConcurrentMap<CaffeineCacheBackend.CacheKeyValue, CaffeineCacheBackend.CacheKeyValue> cacheMap =
backend.cache.asMap();

soft.assertThat(cacheMap)
.doesNotContainKey(CaffeineCacheBackend.cacheKey("repo", nonCachingObj.id()))
Expand Down

0 comments on commit 8307a13

Please sign in to comment.