From 89faab9e7731559af357ee0d78d805551bd57c3b Mon Sep 17 00:00:00 2001 From: Robert Stupp Date: Mon, 30 Sep 2024 15:13:43 -0500 Subject: [PATCH] Cache: keep (deserialized) object around (#9648) Refactor the cached-key-value to keep the deserialized object around leveraging a `java.lang.ref.SoftReference` to it. While this increases the overall heap pressure for each cached entity, the referenced objects are eligible for garbage collection, if Java GC decides to do so. This change continues to keep the serialized representation around (using a strong reference) - a "simple" `byte[]` is still way less individual Java objects compared to an "exploded" `Obj`, so less effort for GC. This change should help to reducing the deserializing effort especially for very frequently accessed objects, despite the semantics of `SoftReference`. --- CHANGELOG.md | 7 ++ .../quarkus/config/QuarkusStoreConfig.java | 2 +- .../versioned/storage/cache/CacheSizing.java | 2 +- .../storage/cache/CaffeineCacheBackend.java | 100 +++++++++++++----- .../storage/cache/TestCacheExpiration.java | 3 +- .../storage/cache/TestCacheSizing.java | 4 +- 6 files changed, 88 insertions(+), 30 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b13c20c2aab..36a59cba00d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,13 @@ as necessary. Empty sections will not end in the release notes. ### Changes +- The persistence cache tries to avoid deserialization overhead when getting an object from the + cache by using Java's `SoftReference`. There is no guarantee that cached objects keep their + Java object tree around, but it should eventually for the majority of accesses to frequently + accessed cached objects. The default cache capacity fraction has been reduced from 70% of the + heap size to 60% of the heap size. However, extreme heap pressure may let Java GC clear all + `SoftReference`s. + ### Deprecations ### Fixes diff --git a/servers/quarkus-config/src/main/java/org/projectnessie/quarkus/config/QuarkusStoreConfig.java b/servers/quarkus-config/src/main/java/org/projectnessie/quarkus/config/QuarkusStoreConfig.java index 6ee1a28f853..6d3bef2973d 100644 --- a/servers/quarkus-config/src/main/java/org/projectnessie/quarkus/config/QuarkusStoreConfig.java +++ b/servers/quarkus-config/src/main/java/org/projectnessie/quarkus/config/QuarkusStoreConfig.java @@ -135,7 +135,7 @@ public interface QuarkusStoreConfig extends StoreConfig { /** * Fraction of Java’s max heap size to use for cache objects, set to 0 to disable. Must not be * used with fixed cache sizing. If neither this value nor a fixed size is configured, a default - * of .7 (70%) is assumed. + * of {@code .6} (60%) is assumed. */ @WithName(CONFIG_CACHE_CAPACITY_FRACTION_OF_HEAP) OptionalDouble cacheCapacityFractionOfHeap(); diff --git a/versioned/storage/cache/src/main/java/org/projectnessie/versioned/storage/cache/CacheSizing.java b/versioned/storage/cache/src/main/java/org/projectnessie/versioned/storage/cache/CacheSizing.java index 1ffd8767675..07cf3a8cb93 100644 --- a/versioned/storage/cache/src/main/java/org/projectnessie/versioned/storage/cache/CacheSizing.java +++ b/versioned/storage/cache/src/main/java/org/projectnessie/versioned/storage/cache/CacheSizing.java @@ -27,7 +27,7 @@ public interface CacheSizing { int DEFAULT_HEAP_SIZE_KEEP_FREE = 256; int DEFAULT_MIN_SIZE_MB = 64; - double DEFAULT_HEAP_FRACTION = 0.7d; + double DEFAULT_HEAP_FRACTION = 0.6d; OptionalInt fixedSizeInMB(); diff --git a/versioned/storage/cache/src/main/java/org/projectnessie/versioned/storage/cache/CaffeineCacheBackend.java b/versioned/storage/cache/src/main/java/org/projectnessie/versioned/storage/cache/CaffeineCacheBackend.java index e55052fd549..c1ef6157a61 100644 --- a/versioned/storage/cache/src/main/java/org/projectnessie/versioned/storage/cache/CaffeineCacheBackend.java +++ b/versioned/storage/cache/src/main/java/org/projectnessie/versioned/storage/cache/CaffeineCacheBackend.java @@ -31,6 +31,7 @@ import io.micrometer.core.instrument.Tag; import io.micrometer.core.instrument.binder.cache.CaffeineStatsCounter; import jakarta.annotation.Nonnull; +import java.lang.ref.SoftReference; import java.time.Duration; import org.checkerframework.checker.index.qual.NonNegative; import org.projectnessie.versioned.storage.common.exceptions.ObjTooLargeException; @@ -44,10 +45,11 @@ class CaffeineCacheBackend implements CacheBackend { public static final String CACHE_NAME = "nessie-objects"; - private static final byte[] NON_EXISTING_SENTINEL = "NON_EXISTING".getBytes(UTF_8); + private static final CacheKeyValue NON_EXISTING_SENTINEL = + new CacheKeyValue("x", ObjId.EMPTY_OBJ_ID, 0L, new byte[0], null); private final CacheConfig config; - final Cache cache; + final Cache cache; private final long refCacheTtlNanos; private final long refCacheNegativeTtlNanos; @@ -58,15 +60,15 @@ class CaffeineCacheBackend implements CacheBackend { refCacheTtlNanos = config.referenceTtl().orElse(Duration.ZERO).toNanos(); refCacheNegativeTtlNanos = config.referenceNegativeTtl().orElse(Duration.ZERO).toNanos(); - Caffeine cacheBuilder = + Caffeine cacheBuilder = Caffeine.newBuilder() .maximumWeight(config.capacityMb() * 1024L * 1024L) .weigher(this::weigher) .expireAfter( - new Expiry() { + new Expiry() { @Override public long expireAfterCreate( - CacheKeyValue key, byte[] value, long currentTimeNanos) { + CacheKeyValue key, CacheKeyValue value, long currentTimeNanos) { long expire = key.expiresAtNanosEpoch; if (expire == CACHE_UNLIMITED) { return Long.MAX_VALUE; @@ -81,7 +83,7 @@ public long expireAfterCreate( @Override public long expireAfterUpdate( CacheKeyValue key, - byte[] value, + CacheKeyValue value, long currentTimeNanos, @NonNegative long currentDurationNanos) { return expireAfterCreate(key, value, currentTimeNanos); @@ -90,7 +92,7 @@ public long expireAfterUpdate( @Override public long expireAfterRead( CacheKeyValue key, - byte[] value, + CacheKeyValue value, long currentTimeNanos, @NonNegative long currentDurationNanos) { return currentDurationNanos; @@ -118,11 +120,8 @@ public Persist wrap(@Nonnull Persist persist) { return new CachingPersistImpl(persist, cache); } - private int weigher(CacheKeyValue key, byte[] value) { + private int weigher(CacheKeyValue key, CacheKeyValue value) { int size = key.heapSize(); - if (value != null) { - size += ARRAY_OVERHEAD + value.length; - } size += CAFFEINE_OBJ_OVERHEAD; return size; } @@ -130,14 +129,14 @@ private int weigher(CacheKeyValue key, byte[] value) { @Override public Obj get(@Nonnull String repositoryId, @Nonnull ObjId id) { CacheKeyValue key = cacheKey(repositoryId, id); - byte[] value = cache.getIfPresent(key); + CacheKeyValue value = cache.getIfPresent(key); if (value == null) { return null; } if (value == NON_EXISTING_SENTINEL) { return NOT_FOUND_OBJ_SENTINEL; } - return ProtoSerialization.deserializeObj(id, 0L, value, null); + return value.getObj(); } @Override @@ -159,8 +158,9 @@ public void putLocal(@Nonnull String repositoryId, @Nonnull Obj obj) { byte[] serialized = serializeObj(obj, Integer.MAX_VALUE, Integer.MAX_VALUE, true); long expiresAtNanos = expiresAt == CACHE_UNLIMITED ? CACHE_UNLIMITED : MICROSECONDS.toNanos(expiresAt); - CacheKeyValue keyValue = cacheKeyValue(repositoryId, obj.id(), expiresAtNanos); - cache.put(keyValue, serialized); + CacheKeyValue keyValue = + cacheKeyValue(repositoryId, obj.id(), expiresAtNanos, serialized, obj); + cache.put(keyValue, keyValue); } catch (ObjTooLargeException e) { // this should never happen throw new RuntimeException(e); @@ -220,9 +220,14 @@ public void putReferenceLocal(@Nonnull String repositoryId, @Nonnull Reference r return; } ObjId id = refObjId(r.name()); - CacheKeyValue key = - cacheKeyValue(repositoryId, id, config.clockNanos().getAsLong() + refCacheTtlNanos); - cache.put(key, serializeReference(r)); + CacheKeyValue keyValue = + cacheKeyValue( + repositoryId, + id, + config.clockNanos().getAsLong() + refCacheTtlNanos, + serializeReference(r), + r); + cache.put(keyValue, keyValue); } @Override @@ -242,12 +247,14 @@ public Reference getReference(@Nonnull String repositoryId, @Nonnull String name return null; } ObjId id = refObjId(name); - CacheKeyValue keyValue = cacheKey(repositoryId, id); - byte[] bytes = cache.getIfPresent(keyValue); - if (bytes == NON_EXISTING_SENTINEL) { + CacheKeyValue value = cache.getIfPresent(cacheKey(repositoryId, id)); + if (value == null) { + return null; + } + if (value == NON_EXISTING_SENTINEL) { return NON_EXISTENT_REFERENCE_SENTINEL; } - return bytes != null ? deserializeReference(bytes) : null; + return value.getReference(); } static CacheKeyValue cacheKey(String repositoryId, ObjId id) { @@ -259,6 +266,11 @@ private static CacheKeyValue cacheKeyValue( return new CacheKeyValue(repositoryId, id, expiresAtNanosEpoch); } + private static CacheKeyValue cacheKeyValue( + String repositoryId, ObjId id, long expiresAtNanosEpoch, byte[] serialized, Object object) { + return new CacheKeyValue(repositoryId, id, expiresAtNanosEpoch, serialized, object); + } + /** * Class used for both the cache key and cache value including the expiration timestamp. This is * (should be) more efficient (think: mono-morphic vs bi-morphic call sizes) and more GC/heap @@ -273,20 +285,35 @@ static final class CacheKeyValue { // Revisit this field before 2262-04-11T23:47:16.854Z (64-bit signed long overflow) ;) ;) final long expiresAtNanosEpoch; + final byte[] serialized; + java.lang.ref.Reference object; + CacheKeyValue(String repositoryId, ObjId id) { this(repositoryId, id, 0L); } CacheKeyValue(String repositoryId, ObjId id, long expiresAtNanosEpoch) { + this(repositoryId, id, expiresAtNanosEpoch, null, null); + } + + CacheKeyValue( + String repositoryId, ObjId id, long expiresAtNanosEpoch, byte[] serialized, Object object) { this.repositoryId = repositoryId; this.id = id; this.expiresAtNanosEpoch = expiresAtNanosEpoch; + this.serialized = serialized; + this.object = new SoftReference<>(object, null); } int heapSize() { int size = OBJ_SIZE; size += STRING_OBJ_OVERHEAD + repositoryId.length(); size += id.heapSize(); + byte[] s = serialized; + if (s != null) { + size += ARRAY_OVERHEAD + s.length; + } + size += SOFT_REFERENCE_OVERHEAD; return size; } @@ -311,6 +338,26 @@ public int hashCode() { public String toString() { return "{" + repositoryId + ", " + id + '}'; } + + Obj getObj() { + Obj obj = (Obj) this.object.get(); + if (obj == null) { + obj = ProtoSerialization.deserializeObj(id, 0L, this.serialized, null); + // re-create the soft reference - but don't care about JMM side effects + this.object = new SoftReference<>(obj); + } + return obj; + } + + Reference getReference() { + Reference ref = (Reference) this.object.get(); + if (ref == null) { + ref = deserializeReference(this.serialized); + // re-create the soft reference - but don't care about JMM side effects + this.object = new SoftReference<>(ref); + } + return ref; + } } /* @@ -321,15 +368,18 @@ public String toString() { 12 4 java.lang.String CacheKeyValue.repositoryId null 16 8 long CacheKeyValue.expiresAt 0 24 4 org.projectnessie.versioned.storage.common.persist.ObjId CacheKeyValue.id null - 28 4 (object alignment gap) - Instance size: 32 bytes + 28 4 byte[] CacheKeyValue.serialized null + 32 4 java.lang.ref.Reference CacheKeyValue.object null + 36 4 (object alignment gap) + Instance size: 40 bytes Space losses: 0 bytes internal + 4 bytes external = 4 bytes total */ - static final int OBJ_SIZE = 32; + static final int OBJ_SIZE = 40; /* Array overhead: 16 bytes */ static final int ARRAY_OVERHEAD = 16; + static final int SOFT_REFERENCE_OVERHEAD = 32; /* java.lang.String object internals: OFF SZ TYPE DESCRIPTION VALUE diff --git a/versioned/storage/cache/src/test/java/org/projectnessie/versioned/storage/cache/TestCacheExpiration.java b/versioned/storage/cache/src/test/java/org/projectnessie/versioned/storage/cache/TestCacheExpiration.java index a2b5d514ef7..f4345580a5d 100644 --- a/versioned/storage/cache/src/test/java/org/projectnessie/versioned/storage/cache/TestCacheExpiration.java +++ b/versioned/storage/cache/src/test/java/org/projectnessie/versioned/storage/cache/TestCacheExpiration.java @@ -57,7 +57,8 @@ public void cachingObjectsExpiration() { backend.put("repo", dynamicCachingObj); backend.put("repo", stdObj); - ConcurrentMap cacheMap = backend.cache.asMap(); + ConcurrentMap cacheMap = + backend.cache.asMap(); soft.assertThat(cacheMap) .doesNotContainKey(CaffeineCacheBackend.cacheKey("repo", nonCachingObj.id())) diff --git a/versioned/storage/cache/src/test/java/org/projectnessie/versioned/storage/cache/TestCacheSizing.java b/versioned/storage/cache/src/test/java/org/projectnessie/versioned/storage/cache/TestCacheSizing.java index 71310bf1827..260922ead23 100644 --- a/versioned/storage/cache/src/test/java/org/projectnessie/versioned/storage/cache/TestCacheSizing.java +++ b/versioned/storage/cache/src/test/java/org/projectnessie/versioned/storage/cache/TestCacheSizing.java @@ -82,14 +82,14 @@ void tinyHeapNoCache() { void defaultSettings4G() { // Assuming a 4G max heap, requesting 70% (358MB), sizing must yield 2867MB. soft.assertThat(CacheSizing.builder().build().calculateEffectiveSizeInMB(BYTES_4G)) - .isEqualTo(2867); + .isEqualTo(2457); } @Test void defaultSettings1G() { soft.assertThat(CacheSizing.builder().build().calculateEffectiveSizeInMB(BYTES_1G)) // 70 % of 1024 MB - .isEqualTo(716); + .isEqualTo(614); } @Test